diff --git a/config.json b/config.json index e9c650ad281bc356f121ede5853eefb749f07faa..f00fabfedadd9d794a22d5ff97b6ecf6c35d024e 100644 --- a/config.json +++ b/config.json @@ -2,52 +2,15 @@ "architectures": [ "ArcticForCausalLM" ], - "attention_dropout": 0, "auto_map": { "AutoConfig": "configuration_arctic.ArcticConfig", "AutoModel": "modeling_arctic.ArcticModel", "AutoModelForCausalLM": "modeling_arctic.ArcticForCausalLM", "AutoModelForSequenceClassification": "modeling_arctic.ArcticForSequenceClassification" }, + "attention_dropout": 0, "bos_token_id": 31998, "enable_expert_tensor_parallelism": false, - "enc_index": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30, - 31, - 32, - 33, - 34 - ], "eos_token_id": 31999, "hidden_act": "silu", "hidden_size": 7168, @@ -64,7 +27,7 @@ "num_attention_heads": 56, "num_experts_per_tok": 2, "num_hidden_layers": 35, - "num_key_value_heads": 56, + "num_key_value_heads": 8, "num_local_experts": 128, "parallel_attn_mlp_res": true, "quantization": null, @@ -74,7 +37,7 @@ "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", - "transformers_version": "4.39.0.dev0", + "transformers_version": "4.40.0.dev0", "use_cache": true, "use_residual": true, "vocab_size": 32000 diff --git a/generation_config.json b/generation_config.json index 318edbdda130a1772ce667101a553bd954682809..c6dbe7346c664254b701306e478e5062443975e8 100644 --- a/generation_config.json +++ b/generation_config.json @@ -2,5 +2,5 @@ "_from_model_config": true, "bos_token_id": 31998, "eos_token_id": 31999, - "transformers_version": "4.39.0.dev0" + "transformers_version": "4.40.0.dev0" } diff --git a/model-00001-of-00194.safetensors b/model-00001-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c68e7380e894499f6d8da43bcff7af957c990cba --- /dev/null +++ b/model-00001-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b6b3b661afba0f1d3c81c6a13d33c32a8970671e29c167bef5eb880891d36cb +size 4949025248 diff --git a/model-00002-of-00194.safetensors b/model-00002-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffd40f99a4ba06233e57c0d727df27198f91ab6d --- /dev/null +++ b/model-00002-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffaaea90765ed446108d6ad626823776ab1219c4e1193c1fe62bfc6829de3003 +size 4950860888 diff --git a/model-00003-of-00194.safetensors b/model-00003-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f8d92bebf8dadbdc07d88a28b5c929fb358ba361 --- /dev/null +++ b/model-00003-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:525b2c3078d389c0165319d76134c9b09df53dba33fd28512b6a6f84a320ba59 +size 4950860888 diff --git a/model-00004-of-00194.safetensors b/model-00004-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f7fb5b68d203547c8376dfeb76424430fca12912 --- /dev/null +++ b/model-00004-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d6942a6252289ff0c875005c20bd4ae03afc459207d6d645105ad521085d246 +size 4950860888 diff --git a/model-00005-of-00194.safetensors b/model-00005-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b3aa6f1ab9653237736925f8204419c6f2032a8 --- /dev/null +++ b/model-00005-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:780b63b65fe941603685c2d77e41e7741ca44b0c50ef063b96fc4afa843fc6f1 +size 4950860936 diff --git a/model-00006-of-00194.safetensors b/model-00006-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b2c06428ccae1c7de93b19af6f4e3291d299e4b --- /dev/null +++ b/model-00006-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99679d8e27845bb97ca6b0584cc397a35f1adcd55697417e4daea06560b3e137 +size 4938059088 diff --git a/model-00007-of-00194.safetensors b/model-00007-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..67cd13c75b0f46b0f88f30c63d931d7d598f8887 --- /dev/null +++ b/model-00007-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35a9d980f33b215cf1c8a0e4c1efaa504bb5891b871c8d208971e7f651c3b77e +size 4950860880 diff --git a/model-00008-of-00194.safetensors b/model-00008-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c92a00e5633917a76581b7c9ebd0023cf8d04cf0 --- /dev/null +++ b/model-00008-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c597884b4fdd066ad311cc71d4935ba8c16b806e892f4252b55c60877ffe81c6 +size 4950860888 diff --git a/model-00009-of-00194.safetensors b/model-00009-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b0aa496cea6283ae0bc59adc06069b8475c1d29 --- /dev/null +++ b/model-00009-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2e115943add33c745b7d05721c3a18ae343c92c51bf10d236ee2f9ffff6df88 +size 4950860888 diff --git a/model-00010-of-00194.safetensors b/model-00010-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..41df1427ddebb9c79db1688fc1007fe0277ab9c1 --- /dev/null +++ b/model-00010-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22a7c99a6e28c3f34639efff064cd7b71e28848c383477b4a358d08bf3beb017 +size 4950860896 diff --git a/model-00011-of-00194.safetensors b/model-00011-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..edbbede563570254b34fb9bf074c60e372893133 --- /dev/null +++ b/model-00011-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34cc861d96f05c3c23629acf102e78c968f6729b5fb02ef7fe11b6ffad90f894 +size 4950860960 diff --git a/model-00012-of-00194.safetensors b/model-00012-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..785d7fc3f3eaed59e0f023ac41621b88549bc909 --- /dev/null +++ b/model-00012-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d7f34b93e8285791dcda9e4fb62520771619026294bf09018140092a0e920ce +size 4938059048 diff --git a/model-00013-of-00194.safetensors b/model-00013-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb4284ea3bd47fd5c0f622db491fb286eaf84c31 --- /dev/null +++ b/model-00013-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e81da7d218661db4ba306fe3edb511e651fcc45ff54c92413c8b92fcb17215e3 +size 4950860888 diff --git a/model-00014-of-00194.safetensors b/model-00014-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8a6c5c1e4c4cd23f52e58728983a78b777081182 --- /dev/null +++ b/model-00014-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:827e4bac4c5f8a8bbece1b90b576c1a9f016898f6dc74e31f101d8a8640ae2ad +size 4950860888 diff --git a/model-00015-of-00194.safetensors b/model-00015-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ace1e223b74a40943d4bfd49e282fc7af0f1c002 --- /dev/null +++ b/model-00015-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b18a5d47f134c2d7a86b043c6ff1eabbd63ba50a14d931cc2b37a3090f90531 +size 4950860888 diff --git a/model-00016-of-00194.safetensors b/model-00016-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33d5b4e736f2997a1ef197a7e2557b95d2b7afdb --- /dev/null +++ b/model-00016-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3600ee555b020f99e6de2aa2922793680dc3037dc0f273bcd5683daa06a39cdf +size 4950860928 diff --git a/model-00017-of-00194.safetensors b/model-00017-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..91049d174922b5fbf9d55e3b8cb2c94de33cf635 --- /dev/null +++ b/model-00017-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5474d2f5c6ee898006bc2e6a6c9d8cbd97719b15f9c9d8120daa472f80595ce7 +size 4938059096 diff --git a/model-00018-of-00194.safetensors b/model-00018-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1eb0de48251dcfaa1f55f7a71a07a142c269ff76 --- /dev/null +++ b/model-00018-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29beb9e4e5a7098e3e88f1df935bfc0fa7286bc5883878656a0e09fd8df24496 +size 4950860880 diff --git a/model-00019-of-00194.safetensors b/model-00019-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..06bbe41dc21e3f54e6ad26074d13b1bbe1eff62f --- /dev/null +++ b/model-00019-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0718a1ba6e333a06449c8a5b4edb4b2a2e085bf7d38f149efd642a5d84ed5f5 +size 4950860888 diff --git a/model-00020-of-00194.safetensors b/model-00020-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0737f7c3cab5f98d2173aa321a9970a29f8fdcb --- /dev/null +++ b/model-00020-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c8ee3cf56f1a66545d335af2686aec656417b8881b7411e7011a9280acc588f +size 4950860888 diff --git a/model-00021-of-00194.safetensors b/model-00021-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a33daf360ab1e5e6de0d65a157642bef1e5515ed --- /dev/null +++ b/model-00021-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30a965babc1a7608f382b22dd5b570e1f6ccb4b0d9c1aab9928751f841b40043 +size 4950860896 diff --git a/model-00022-of-00194.safetensors b/model-00022-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..21d4d474b9f497a9a528c257f0f58704ddcbd302 --- /dev/null +++ b/model-00022-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcd42bb7296c3da84e22799f40cf2eaa4311e9c5d0d30da8f7f29a16a3ed5765 +size 4950860960 diff --git a/model-00023-of-00194.safetensors b/model-00023-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78183ad1062dab86d96a81268bbfe03380ac47e3 --- /dev/null +++ b/model-00023-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5073d62eebf9a4ae235eb416b742da8e22955596008c7274984d6f6e40c43c1a +size 4938059048 diff --git a/model-00024-of-00194.safetensors b/model-00024-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..543017acdf6fff921a8836f1a0d2c9735b7577e9 --- /dev/null +++ b/model-00024-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21e27ec46a78f76166d67c1af957ef959b7d037844c32a8378d6ff9e131de613 +size 4950860888 diff --git a/model-00025-of-00194.safetensors b/model-00025-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af450ae3f67f22e5222a57b55708b777050e05ce --- /dev/null +++ b/model-00025-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72a415586e5b318ebd6a2296e1f67743b2887effaf1e5b28db0879b8b03aa4fa +size 4950860888 diff --git a/model-00026-of-00194.safetensors b/model-00026-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b2a7970d04e904692079e370307761d63854c0f --- /dev/null +++ b/model-00026-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f4fbe2327ccf34d547e934656bd6306fdf692ed0a46fbc67929282f852abb5f +size 4950860888 diff --git a/model-00027-of-00194.safetensors b/model-00027-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e47ed169bc94fa0af98f9518f7dc7bdd74558126 --- /dev/null +++ b/model-00027-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56331dc2e68347f0ca177092e5cfc4a5ee6d9f988eedccf8bc2e743e5de8eb88 +size 4950860928 diff --git a/model-00028-of-00194.safetensors b/model-00028-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9fde301c0d7683d382ba60c4732ca06ae404e7e3 --- /dev/null +++ b/model-00028-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30174a3c3bab79cb51d17cdc9d091bd344652d2eeb86562e91980def55da1ceb +size 4938059104 diff --git a/model-00029-of-00194.safetensors b/model-00029-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ed5a8554c556dd605aba34746266a2fd614e6bc --- /dev/null +++ b/model-00029-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da0d273a14bde6d01539198cab2b52242e50fc983ecc8e9073da0541c3436f90 +size 4950860880 diff --git a/model-00030-of-00194.safetensors b/model-00030-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..55332c9eb4fc7777d7723f8f0181001676068fc3 --- /dev/null +++ b/model-00030-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1ab38c54bbe674cb018278e495df349ce92af4e572b6a6876d5797620849fc5 +size 4950860888 diff --git a/model-00031-of-00194.safetensors b/model-00031-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39d2b589311928c0f0c45af5542bef8a1a750c23 --- /dev/null +++ b/model-00031-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2cf2ba3e32da0c033e789430ecc3540abdeea590db8a68033fa56b6a65a268d +size 4950860888 diff --git a/model-00032-of-00194.safetensors b/model-00032-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6eaac924e1b86ffa63a21b56bb813563e7b892f4 --- /dev/null +++ b/model-00032-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f6b94ab3e354c934c92c54269e533c1bd10919a2a1cacbcf4263d683f549d96 +size 4950860888 diff --git a/model-00033-of-00194.safetensors b/model-00033-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fee48eaea6f5d139b7302630bcdb11ae52cef054 --- /dev/null +++ b/model-00033-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e092aa7afdd2723f72336a4aa06f63aae009408f65242015aa328546dc05eda1 +size 4950860960 diff --git a/model-00034-of-00194.safetensors b/model-00034-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..457bf51e91371afb959e1d7fe20b955528325ba2 --- /dev/null +++ b/model-00034-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:806269f07bf9d80b1b14dd3b040eaa5684d0beab4557495d45cc7db907746e6c +size 4938059056 diff --git a/model-00035-of-00194.safetensors b/model-00035-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d8dfc39c018f1042e99e51115642ae37dcd6f3f --- /dev/null +++ b/model-00035-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5ce77a3e8bf4c046e10d91b78b3aee5154b776b08ea7db0ffe1849120c531cc +size 4950860888 diff --git a/model-00036-of-00194.safetensors b/model-00036-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9d3ad6e6a2b23a5807fbe59c9e32e0cdb8a0738 --- /dev/null +++ b/model-00036-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16d295f0c0b88078ef98d190282ca3263fd4cfc6cd636609bbcd98cbf8a08b73 +size 4950860888 diff --git a/model-00037-of-00194.safetensors b/model-00037-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..56be08c1fbc129a381e92967696776cd35ba9961 --- /dev/null +++ b/model-00037-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80d454f57c7b962680f42bd36de7319f45027418f5b4e843093a8d3bf604c5ad +size 4950860888 diff --git a/model-00038-of-00194.safetensors b/model-00038-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71d29e88ae7829ece54401c1032a9527c4e6f9e7 --- /dev/null +++ b/model-00038-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a1d1d33c0842d7c5635187b9e29a3f15fe3c7e1548190577e0c5e7238d19eed +size 4950860928 diff --git a/model-00039-of-00194.safetensors b/model-00039-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e6b23397466db8c13f0ad8fe9331f8581d3ae8f8 --- /dev/null +++ b/model-00039-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dddc462bc6c106ad44dbd184a4f6e3b193d030f20f0b5219dbdc14f167f37963 +size 4938059112 diff --git a/model-00040-of-00194.safetensors b/model-00040-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..751f35c69f91920086b20c5f3f6fb07f32101fb0 --- /dev/null +++ b/model-00040-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:835a041b2ccf38998226ea7a1903690ecfbb0edd9b786c7b9cadc98c2f376ec3 +size 4950860872 diff --git a/model-00041-of-00194.safetensors b/model-00041-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0200d6d457391b5c23d67388a631807cab6f6e0b --- /dev/null +++ b/model-00041-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22c159ad01ad2be59a1d3631109ce57d22a2d16cf8e3f2c3aa0358958a5dd263 +size 4950860888 diff --git a/model-00042-of-00194.safetensors b/model-00042-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5cae02a9301ed52be6315000357f7f0127fc7b5f --- /dev/null +++ b/model-00042-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a89fb921c670bc7dc8b1d94ffd19f3c20a14fe7613df5e25f577431b5841e424 +size 4950860888 diff --git a/model-00043-of-00194.safetensors b/model-00043-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b311a98c84e70c4f9ef861d4fd18cee45fb130d3 --- /dev/null +++ b/model-00043-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3417b3e2c641ce925c44ffdeece5772c7ce832e164d237009e8c2360a5faf000 +size 4950860888 diff --git a/model-00044-of-00194.safetensors b/model-00044-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a96bb8d0fe709c1a8e6f3cd31fea9014700bf0d6 --- /dev/null +++ b/model-00044-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ff844aaae7961c7240a0e260bcfa81975cd61b53c068ce4a5ba085500eaee6c +size 4950860960 diff --git a/model-00045-of-00194.safetensors b/model-00045-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04e80386b8f04101848636c63b321b8bdf70d557 --- /dev/null +++ b/model-00045-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24f22daa86c78fd393a6213889079ff588e6db1988c854e7b3793e412e3cddf7 +size 4938059056 diff --git a/model-00046-of-00194.safetensors b/model-00046-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1233b5568ea91bd712e5d77a7144551929b4f4a2 --- /dev/null +++ b/model-00046-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e57a3680f7fb685e2e20165c5eca5299c9fa168ebe09733a767903509b371c11 +size 4950860888 diff --git a/model-00047-of-00194.safetensors b/model-00047-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c77af0faa19444b67ac69143e25c06cf42e4e384 --- /dev/null +++ b/model-00047-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:106f323c866eaf6b29597d22975e3a42605555d6716013aa27faa2ee40a4082a +size 4950860888 diff --git a/model-00048-of-00194.safetensors b/model-00048-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20fc46d744a1c81b4042bf385d94fa2f5e35cc9c --- /dev/null +++ b/model-00048-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ea2d67f0cb60ee3a0d8607fef879185339db9416aa6e1b7b8bd231a378c3c81 +size 4950860888 diff --git a/model-00049-of-00194.safetensors b/model-00049-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec49d9764528a6cb15d4713c8c576fc4be078eea --- /dev/null +++ b/model-00049-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acc2750e88ed909f473e96f4eaf4239490217d982e22b56bc9f5ed9ed0ef1461 +size 4950860920 diff --git a/model-00050-of-00194.safetensors b/model-00050-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..30563529a191d456c05c9382a5ce08b2ec4c313e --- /dev/null +++ b/model-00050-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48528bc718492115a29ea44673edf3ee897e684ff1987e6e98f897c540302c0d +size 4938059112 diff --git a/model-00051-of-00194.safetensors b/model-00051-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b37fcf205ae23e5d599e164e116407675fa1420b --- /dev/null +++ b/model-00051-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3483c0a9a4685aeaf074a98ca207d6657930dc93e79ae353300824b5cfaf7d5b +size 4950860872 diff --git a/model-00052-of-00194.safetensors b/model-00052-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f511b76199e2c206635cba80dc7ebf94e46e7f0 --- /dev/null +++ b/model-00052-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39fdffd8fe925ee05147f16591abe01d41dbb8cf92958d8825cd6abc80763b99 +size 4950860888 diff --git a/model-00053-of-00194.safetensors b/model-00053-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..046d7ad0c9cf49f05ab799095dfceb62a4e93e6e --- /dev/null +++ b/model-00053-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd04c5d07507bec4d62e3b8282579f4babd78a2ff558e7857ee1b89b69ed8049 +size 4950860888 diff --git a/model-00054-of-00194.safetensors b/model-00054-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6ed477b230278acbb9dfa5b2148da3c4f4f395c1 --- /dev/null +++ b/model-00054-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9ac4a6b854bcca1aaa7514c4c5fe9ddab39b4a958888125e2c21bcc268c42bb +size 4950860888 diff --git a/model-00055-of-00194.safetensors b/model-00055-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9771921fb676e4f5badd9dacec7f39af9c4922e0 --- /dev/null +++ b/model-00055-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b609d3877d204293f475f69b893012db7c9df60d0db114bf05f979d0d2af9fe1 +size 4950860960 diff --git a/model-00056-of-00194.safetensors b/model-00056-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c4ea8786e994e8e317d58e78eff20f50dfa593c2 --- /dev/null +++ b/model-00056-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6c01453bd31faf6cea090b8a605485a49c3a6c6fc5a271f5179e61653e6a264 +size 4938059112 diff --git a/model-00057-of-00194.safetensors b/model-00057-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b903705fbd373ed42549665d8d01c641fb3767c6 --- /dev/null +++ b/model-00057-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:902cac972f800aaca62c1f9d5ab32a6b3e3fd5f7d4d59dcc09c196b202d767e3 +size 4950860960 diff --git a/model-00058-of-00194.safetensors b/model-00058-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c7deb072c6bb125cb43856c6f3ae7f3595bf5851 --- /dev/null +++ b/model-00058-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50e08922bc74638d1aeb82df07732f70502aa29c92241749ca68c50b29cdd92e +size 4950860960 diff --git a/model-00059-of-00194.safetensors b/model-00059-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2cb8938221babb389345df2c4a3c29626bda2c8f --- /dev/null +++ b/model-00059-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:291109fc796fba06db16ee09e866f85924eb05bc441928bbb519393b1c1882dc +size 4950860960 diff --git a/model-00060-of-00194.safetensors b/model-00060-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0bb418a36fa76640ac27af0980a2350132efd8c1 --- /dev/null +++ b/model-00060-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e09a3add8ab0f00cfc790ad98604d3062f18dd725946a729f03784074c450f2d +size 4950860992 diff --git a/model-00061-of-00194.safetensors b/model-00061-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0882502d749cbcfbab90d4a5f58904843e1d736 --- /dev/null +++ b/model-00061-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:155df067cec8f3fd8bccb06a98e8bad4f15f6379b88a054f3009f6ed4cd82bc6 +size 4938059192 diff --git a/model-00062-of-00194.safetensors b/model-00062-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..52c3456355480f90361c3a20d09a53ea46b981d6 --- /dev/null +++ b/model-00062-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85f896ef7bbc81e50360bf41fa2509cf37eec6dea39f0d3d8392e994a1f7e99c +size 4950860936 diff --git a/model-00063-of-00194.safetensors b/model-00063-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33f19593a4a0c838b009e8c5417b6bd18af9100b --- /dev/null +++ b/model-00063-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d99b3364c7b491f6ad3e5d1fd8d40a317eb241a4c4593a976c9ed296fe262ff +size 4950860960 diff --git a/model-00064-of-00194.safetensors b/model-00064-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..77d92272ef59e6cacbd7a132bb0977635365df98 --- /dev/null +++ b/model-00064-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:542ee15e371a4dc585e3d3d9907bd4d02b4463e3d792af320f3ea720c698ebc9 +size 4950860960 diff --git a/model-00065-of-00194.safetensors b/model-00065-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..92c0a0cdf5db8ac0eb7afa33eeb5d703b73deb43 --- /dev/null +++ b/model-00065-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e5887202278f82f7ca7ee8c7c376e57d03e997605638bfbbcd846e28269def8 +size 4950860960 diff --git a/model-00066-of-00194.safetensors b/model-00066-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1bf04dc78bc6633507a9bc08cd63e3fc00b7bbe --- /dev/null +++ b/model-00066-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eca9924032d0da949f6893d9b33a52e710c39c622ac2bc74e31abb6fb20c2e17 +size 4950861024 diff --git a/model-00067-of-00194.safetensors b/model-00067-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f960b15bce5464fec7bae6c4230c6a3edd435d7f --- /dev/null +++ b/model-00067-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b56e621a38749a9a97ef2cb3165a15673851cc3bd6b3b2c22540670228b15c3 +size 4938059136 diff --git a/model-00068-of-00194.safetensors b/model-00068-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf082caa71871ef1ebcfb0d2c8405697267c037f --- /dev/null +++ b/model-00068-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39c4220c35f0de8a125fa4e59f9501740757e09a741a2ac2ea380cb4d774ab76 +size 4950860960 diff --git a/model-00069-of-00194.safetensors b/model-00069-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..86f4f785229e3a2f3859b3f4386a488d35eeea88 --- /dev/null +++ b/model-00069-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b19a11a633ca218f52333c0674ee3e7610474240100689db954bd102b08b952c +size 4950860960 diff --git a/model-00070-of-00194.safetensors b/model-00070-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f7ea6f3c7433d60469906cb8e0d240cca08de3ec --- /dev/null +++ b/model-00070-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbf6754925bacef49a6371e6467798873e07e2d90fcedb99a83fdafee6e8b695 +size 4950860960 diff --git a/model-00071-of-00194.safetensors b/model-00071-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..40cd270c72b1b13f5fb7f1d52d5c79e8afff039b --- /dev/null +++ b/model-00071-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54645b1cd0e931f18391b1942bb43e184afdd3d3fa2bcbc7b8d1eeafc3b19aea +size 4950860984 diff --git a/model-00072-of-00194.safetensors b/model-00072-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..89570e1919023b559ee6d164b1b4076117d77434 --- /dev/null +++ b/model-00072-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:896f3038684213b11f7a410aa8bea35e15de97d9d87d17f3f44f0726cdc197ab +size 4938059200 diff --git a/model-00073-of-00194.safetensors b/model-00073-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..17dd6a20d352e78f010e1f73ae17d27964139650 --- /dev/null +++ b/model-00073-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5cf0e9bfb10da44a198af8f17fc4f385428576f8680b75024fa2a0fdef93664 +size 4950860936 diff --git a/model-00074-of-00194.safetensors b/model-00074-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3ab8c1cfdb6507b1a8348ac3ceab016c02a71b33 --- /dev/null +++ b/model-00074-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4254d083107d94acaed98d69ce07bb8a8bdd3400a1e36c87996f941ba679756 +size 4950860960 diff --git a/model-00075-of-00194.safetensors b/model-00075-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c37ca30a5764c193b575f18fa691a9ad62e91154 --- /dev/null +++ b/model-00075-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa53f3fa2626de35cdfa18ac8c7594b9f04083edcc8edf669feb630796e9e17b +size 4950860960 diff --git a/model-00076-of-00194.safetensors b/model-00076-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a479eb48b543ada78f99c165b61f1725a7cf0b9e --- /dev/null +++ b/model-00076-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0776d7adbd33400da9f0caa99e9566895916327d3e53a48d4bb6f0aa1bb8d760 +size 4950860960 diff --git a/model-00077-of-00194.safetensors b/model-00077-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e11210db5cb95aeb235e8b97fff7624953ebb0d --- /dev/null +++ b/model-00077-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f4db1f59c27091e95e17ff309309c2f66909c062d564c3123d4cc98d5cb5cc7 +size 4950861024 diff --git a/model-00078-of-00194.safetensors b/model-00078-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d2511b5455efa9e88cb12ca4dc53cf86b2db85b3 --- /dev/null +++ b/model-00078-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cf9c257284ed2fd2fc87fca876fcdda16a62c585af64946b0f820f8430dc408 +size 4938059144 diff --git a/model-00079-of-00194.safetensors b/model-00079-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f7f6281c1a0a93bf57ce128d418b0d962140c03 --- /dev/null +++ b/model-00079-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fdca975cd215e5f8f638d1204b87c1f180ee671ef5448272b2b0a56bcb129d7 +size 4950860960 diff --git a/model-00080-of-00194.safetensors b/model-00080-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c11bbe50785803f484eca71de10f1e55d826a254 --- /dev/null +++ b/model-00080-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac6d017dc58541120a0c3fa1232ec4e452ba9ab13de439d22f448c49789af46e +size 4950860960 diff --git a/model-00081-of-00194.safetensors b/model-00081-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7fe0dc5521820c794a5d70422d5d19fe64125073 --- /dev/null +++ b/model-00081-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe5ea23ba4fdadeddea78884e6d03523122e88ace1f5e13f704e1ce97c09a521 +size 4950860960 diff --git a/model-00082-of-00194.safetensors b/model-00082-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f73c9bab24a9ed415cfff760590ecd21bc7856a4 --- /dev/null +++ b/model-00082-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b28ca165a89f069c820f8f28aa883e5441d49ba1a75cf3e1b1f3592bc298d192 +size 4950860984 diff --git a/model-00083-of-00194.safetensors b/model-00083-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d3b15af4ec034998afca47f7fc2772f7e5458a2 --- /dev/null +++ b/model-00083-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d9284e4f6f2779ee516a3e3782bed832e3b238d78054696ba2ed04199debe93 +size 4938059208 diff --git a/model-00084-of-00194.safetensors b/model-00084-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b14db9d071b3f483027abc21398338d874c61c06 --- /dev/null +++ b/model-00084-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e888612c589ed34aa42a3c01a886e5ce6945238fe4aa09974fd9bfe2704f8d7 +size 4950860936 diff --git a/model-00085-of-00194.safetensors b/model-00085-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34556ca1c055e2d8257a61df6c33aaa39b5400e9 --- /dev/null +++ b/model-00085-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfe00659ecc47fe6a21aea3821abdb13dfac205f4c51b8190adb0a04af73ffbb +size 4950860960 diff --git a/model-00086-of-00194.safetensors b/model-00086-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d033b19b144cbb35a0233ab549a2c8a6f86f6f7 --- /dev/null +++ b/model-00086-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b14a7ed429483e96705a627fa10806c349d518fb11e630b0b67b3134073ee1b +size 4950860960 diff --git a/model-00087-of-00194.safetensors b/model-00087-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb80358bcc2258e949e584ae882d00f63567a23e --- /dev/null +++ b/model-00087-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07dc1e0266329dac0e33042bd13f1bd0daa08c9b44a629b40797e1e11a3e5a56 +size 4950860960 diff --git a/model-00088-of-00194.safetensors b/model-00088-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b3d6e96818f55ed244f6cc1bfc29f088c3a932c --- /dev/null +++ b/model-00088-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c1a9a3d8ba194a9f64a6b69963f61aa377be885e3993b291b025474f553cc85 +size 4950861016 diff --git a/model-00089-of-00194.safetensors b/model-00089-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b7c629628b38be90536bf0a51da4ab441905e0a6 --- /dev/null +++ b/model-00089-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fd78b137aaa19366e811df191fa4dbbfab641951e365634824e774adfd39034 +size 4938059144 diff --git a/model-00090-of-00194.safetensors b/model-00090-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2695ffecc6c36200538c9ead78aed1c59e57a675 --- /dev/null +++ b/model-00090-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9244ed9ecb24101c2fc4cf0ffdeaffbcdf0fde852041888d30dd73143d3f5e +size 4950860960 diff --git a/model-00091-of-00194.safetensors b/model-00091-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..123901e6c5aee4690433fb621d9b546f43ecf2c2 --- /dev/null +++ b/model-00091-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01d2c3462e4b620dec75eda2836c895b3bda721095ede3769daff7e3f0973ec3 +size 4950860960 diff --git a/model-00092-of-00194.safetensors b/model-00092-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7f03f27134375864a00c38f83497041b4f89f3fb --- /dev/null +++ b/model-00092-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1d5c77985ee32b269e14b86d3a84315d9008daee7e257d94f5429ddbfbaa474 +size 4950860960 diff --git a/model-00093-of-00194.safetensors b/model-00093-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f7912b0e7816940e8bb7edf1d22b0f8b00182920 --- /dev/null +++ b/model-00093-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24084f6ff481e8120e69ce8c66112689bbd85298567be980123226bc6e8a82b9 +size 4950860984 diff --git a/model-00094-of-00194.safetensors b/model-00094-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd56dfb7d6d9a7aa6bcf900389bda57af143f4f8 --- /dev/null +++ b/model-00094-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf0bb3904eae06e92b2e02245b8e9c714cab3e3cf095dc682130c81ae8e389e6 +size 4938059216 diff --git a/model-00095-of-00194.safetensors b/model-00095-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..038348d8a1d6a8275de95ecdbae452b1fa7c45c0 --- /dev/null +++ b/model-00095-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2164a3a5af0f1c9ac3612e306b887abd863a9371e5ca851d050e6744fd57ed7 +size 4950860928 diff --git a/model-00096-of-00194.safetensors b/model-00096-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f69faccac5826988858c4150ec3704868083169 --- /dev/null +++ b/model-00096-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b21b784a08fb66a41626381440b58d9f38a5f210d3f134825f31366a9e471a71 +size 4950860960 diff --git a/model-00097-of-00194.safetensors b/model-00097-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5721cda2e48f1248d7dd70c44b44fdf2bfc167bf --- /dev/null +++ b/model-00097-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d4095943cb7e6590f9d37dc27735fe3b16794f59e10705102e21b6f076be207 +size 4950860960 diff --git a/model-00098-of-00194.safetensors b/model-00098-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00ec02d694fd5bf6ef584f7ec788556ec7ff774b --- /dev/null +++ b/model-00098-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2c15b6061fd0f95c1d68d44e7e26b5578e1d20104a89560d7fb599660db0eb6 +size 4950860960 diff --git a/model-00099-of-00194.safetensors b/model-00099-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e64e0625454cf37ef1beffa97cbb7c522bae611b --- /dev/null +++ b/model-00099-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6037aba86c7cc65720b7cf925012286814f28285cdce89bf89c276774dcc0b8e +size 4950861016 diff --git a/model-00100-of-00194.safetensors b/model-00100-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..75703f029f1d2351d834d5cf9b7402d6c9f7e6b2 --- /dev/null +++ b/model-00100-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de96f2b1836ccb9ed9edb36bc3a67c24386ce19bbef3c1ec48f28d12a0bf5d96 +size 4938059152 diff --git a/model-00101-of-00194.safetensors b/model-00101-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..553dffe09df2b3847c934a12ce4cffef7dfebe5c --- /dev/null +++ b/model-00101-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df0b797034981ce2bd4b46dadda84a685f1d3387b1d7f576ce7cfa574671169f +size 4950860960 diff --git a/model-00102-of-00194.safetensors b/model-00102-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8cae048be06b2490ab1fe74eb34f4686143f705f --- /dev/null +++ b/model-00102-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb6ef2190b147e2a1d807eacb121f0e2fbf64318f333432d216cfc5a16cb5c26 +size 4950860960 diff --git a/model-00103-of-00194.safetensors b/model-00103-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a80fa6a41d17c462bceef5e4824904fdaf0d0c34 --- /dev/null +++ b/model-00103-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daa113499df0a56d4884ec6a12d6c72d199115c6b5210d3a18d75b9093d010a2 +size 4950860960 diff --git a/model-00104-of-00194.safetensors b/model-00104-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4eed20ec8313d4edeae461663fd865239b35f81 --- /dev/null +++ b/model-00104-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5000ffcc2aada1047f66ebd357edd7437968bc393c55918c448076ed8b310207 +size 4950860976 diff --git a/model-00105-of-00194.safetensors b/model-00105-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8949c02915fb42143843301a84086b8d49422136 --- /dev/null +++ b/model-00105-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:396dc2db2bc7df8f3b23a39662f1fb54fa654506cefdd21d2e625604a16640b6 +size 4910533896 diff --git a/model-00106-of-00194.safetensors b/model-00106-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88d8e5f4b5aaa048e02224c367bc125b18404f6a --- /dev/null +++ b/model-00106-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49b3d0c072d50b01f410be10fa6c2a2024e15b96813d3ff9492213d862696481 +size 4978386248 diff --git a/model-00107-of-00194.safetensors b/model-00107-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..17a75957b6ec4dfc0c740c2badae71f9c6319322 --- /dev/null +++ b/model-00107-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be26252b99210ff7df8ffffef902b7888c2c62a5592de190b043d32df0fd176a +size 4950860960 diff --git a/model-00108-of-00194.safetensors b/model-00108-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..baf2a73096e8ad180d1dbe95ada471a75fe0b3ae --- /dev/null +++ b/model-00108-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fee42d64aa3355a021c7825792f95a3b766d8cffb33b2623bcb6999f56795c6e +size 4950860960 diff --git a/model-00109-of-00194.safetensors b/model-00109-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..375e10a516cb238cab8a547fa60b6eabc8598391 --- /dev/null +++ b/model-00109-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a87fc11258afae5d36447471f1c77fa39fd897f97a32d739289928ec2e0732b +size 4950860960 diff --git a/model-00110-of-00194.safetensors b/model-00110-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f126ec0b17e7a4ee08d730c8660a231b3236aab --- /dev/null +++ b/model-00110-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dc185ccbb9e98005fcddb8e2ac02e811d2044ce7f0bfd767dbf3c35ccb00755 +size 4950861016 diff --git a/model-00111-of-00194.safetensors b/model-00111-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..309fbca8cc90a6a62bed8eec06ee0180dd5a8782 --- /dev/null +++ b/model-00111-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8de7aab612e114c180ce8bac5dae9e6f1fd474d327bd1f11b992788e0ae98f9a +size 4938059152 diff --git a/model-00112-of-00194.safetensors b/model-00112-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37e491ac1ae0fce105724d5f36508fe44a36deea --- /dev/null +++ b/model-00112-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4ce36a07db3905a2dd10b7bcac8c057551e270e9660f3b451864c57cf218d55 +size 4950860960 diff --git a/model-00113-of-00194.safetensors b/model-00113-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c6d61df1b4025d7c8d8fd5a42b436bf5d98148da --- /dev/null +++ b/model-00113-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d322edacd06c5f1b9386ae33e58bb5d6f4dc6b41356b05cd2298a3d2a0d0b79 +size 4950860960 diff --git a/model-00114-of-00194.safetensors b/model-00114-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c3697508781cd22bcee6ef1c819912c9a28f768 --- /dev/null +++ b/model-00114-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39ea4dc79d7a1580dff610c9800fbb9dd954b5ea665e60e2227bb006dfa2563a +size 4950860960 diff --git a/model-00115-of-00194.safetensors b/model-00115-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2bc79902ceb0b4ca2c4694d65f863e66bdd7f627 --- /dev/null +++ b/model-00115-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c8ce66ed56ab410d341e2cd619c2c955f1cd35af3a195f7b164e4140b7c344b +size 4950860976 diff --git a/model-00116-of-00194.safetensors b/model-00116-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c93b64051391e26afc8c2338db81b469cc698050 --- /dev/null +++ b/model-00116-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9610abac4e628ac6280478ee2b036e60df3ab310cddf8c2f3bd22373df652ea +size 4914204080 diff --git a/model-00117-of-00194.safetensors b/model-00117-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de6852c9a34ea10abad77c00abb447b81000392e --- /dev/null +++ b/model-00117-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcd4240d3095ffb5e752f201085df3a29a6b5fdfd5fdfc44da8ff29242a96bcc +size 4974716072 diff --git a/model-00118-of-00194.safetensors b/model-00118-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f191edf3ab1c573a5cde0068300d4d7ffa68e7a3 --- /dev/null +++ b/model-00118-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:994bf30f4b762f38d7b3c8ae4bea374c20146e52707e03fdc22881589fb9d11d +size 4950860960 diff --git a/model-00119-of-00194.safetensors b/model-00119-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..079869d9322598dc4d7ffd2083a6e71538565e36 --- /dev/null +++ b/model-00119-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d509f62736c3cf0c5983a7b4e8616805e4838c6cc4e0b720147da5a584749dd +size 4950860960 diff --git a/model-00120-of-00194.safetensors b/model-00120-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4bb727a298045479bea781eaabb61fe745e53fd5 --- /dev/null +++ b/model-00120-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87a3b936944d3cb7a8e06b93f5c52228e23b686022bfee7348806061104a17c5 +size 4950860960 diff --git a/model-00121-of-00194.safetensors b/model-00121-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61d081e9c1d659ddc11218c8bcb0d2fb3b049d15 --- /dev/null +++ b/model-00121-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c483aa0b3aa43d20acc04b2eae2f9aa61f7348dedd94c0bf1afc31a8324da61 +size 4950861008 diff --git a/model-00122-of-00194.safetensors b/model-00122-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e6e18c609c16eac9ef39d63badd1dab937b08fd9 --- /dev/null +++ b/model-00122-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b28d8511e8a4e7a58db7f383571bc144190089d25ff63998efc952bc28d7a9bd +size 4938059160 diff --git a/model-00123-of-00194.safetensors b/model-00123-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..30b9185baf32e2d904c47cc043f12a5df23b69b9 --- /dev/null +++ b/model-00123-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7248833d7bb6dadbd69addd25015ce6fd1aa6067f92287b124e050bf53cd7c3 +size 4950860960 diff --git a/model-00124-of-00194.safetensors b/model-00124-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d1d6a16b057f97610b8d5d832ea726e244d0ad77 --- /dev/null +++ b/model-00124-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f860ed032f34674592b72669c109b82e48ed4ac8ff999edd60e467f2888bc24b +size 4950860960 diff --git a/model-00125-of-00194.safetensors b/model-00125-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e91d843a6927978b81039ba85c8b70cb8965b9c6 --- /dev/null +++ b/model-00125-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36152ebc4cf5dc1c72fdbee1e63af6d419f1e19870c503848c6f0e3b2c59029b +size 4950860960 diff --git a/model-00126-of-00194.safetensors b/model-00126-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e77441df180e0b4ea3b97295d3db69916ea33009 --- /dev/null +++ b/model-00126-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13632147d3310bec9f88e9447975aca81026005b756f17c34981225fa48f34ea +size 4950860976 diff --git a/model-00127-of-00194.safetensors b/model-00127-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b7384f64c689888d840c065fbd8a92466830732b --- /dev/null +++ b/model-00127-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85fcac5eb169fa459077805ce02543e5a4e5f60830b27fdf9dd179aaa33ed0c1 +size 4950861032 diff --git a/model-00128-of-00194.safetensors b/model-00128-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43038e0e78c1dfc296afcf393ca8635beee834d0 --- /dev/null +++ b/model-00128-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd99d2186f8ecd0a770ccd7f3225666329c6205f2d767649c557dce198fb7e06 +size 4938059112 diff --git a/model-00129-of-00194.safetensors b/model-00129-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90272a4247ce8825afd767fd6a78005a3841fb80 --- /dev/null +++ b/model-00129-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edc1079d32d185e84230c4e8b4015fdb2094ac844641ad884cbc0897457070b8 +size 4950860960 diff --git a/model-00130-of-00194.safetensors b/model-00130-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c250e3166b01f1ff0d56df2594555a0b1b1422ee --- /dev/null +++ b/model-00130-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bba8791bdb4d1445605e50e57368055bb2c4b404924e7150277b8bb898c8d351 +size 4950860960 diff --git a/model-00131-of-00194.safetensors b/model-00131-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3ccc7626377b97befcc4e2b3bcdf2ca832b3201 --- /dev/null +++ b/model-00131-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:612b7d608d121e3ff85e783b141ca03ac8f7b74e336f256c42ef337aa811a737 +size 4950860960 diff --git a/model-00132-of-00194.safetensors b/model-00132-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8267f984a6db52b96e40b2a18a3955d3c85640e1 --- /dev/null +++ b/model-00132-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01b435be0e08849f8f81a2c878ebe09459100499d03ec0ac17f8a07417c43005 +size 4950861008 diff --git a/model-00133-of-00194.safetensors b/model-00133-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..206e6d40951ccab612cea5e46851aac2e0a6123e --- /dev/null +++ b/model-00133-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea3ae898551a950b51fd798d6ece82d29dfece584064af0f909b34c6f344743a +size 4938059160 diff --git a/model-00134-of-00194.safetensors b/model-00134-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..989f47f593aebf735524425adddf55e0072b052c --- /dev/null +++ b/model-00134-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5cf2dee85c5639c2413af90c2d11d78b28fcd472bd56ace9698fcfbafa990e4 +size 4950860952 diff --git a/model-00135-of-00194.safetensors b/model-00135-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..73c8b68bd19b1ba84a408acc8eb28e007db2ec4a --- /dev/null +++ b/model-00135-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41647bcaed7def4d67d23c775b0e6cf9a26a1b40a81b747703340f960ac9e2be +size 4950860960 diff --git a/model-00136-of-00194.safetensors b/model-00136-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29169cacb9f10c32eb5f6e52be80f5820b59b313 --- /dev/null +++ b/model-00136-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d04bbdd71927234d754cfec1decfb1cd1d9687cb86f0a3135486a01e0e34bba +size 4950860960 diff --git a/model-00137-of-00194.safetensors b/model-00137-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d2adcbec18a11fa66eaab436858ee3329ecc41d4 --- /dev/null +++ b/model-00137-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:840f78eb13b435384cc1920f89c7d70399411e9716637d32eb33007da04a80ac +size 4950860968 diff --git a/model-00138-of-00194.safetensors b/model-00138-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e03bbdf94197e29a81853e6990ba7a6d4e5d5017 --- /dev/null +++ b/model-00138-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2db58baca1f5e7b0d17ac7c97c3969255475e4d82b06b0607f41eb688fdd6ca5 +size 4950861032 diff --git a/model-00139-of-00194.safetensors b/model-00139-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..81c6837aa00182d2dcd5bcc28614686c64605f44 --- /dev/null +++ b/model-00139-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e09a3595d619fa9aab16cd30ab2e8ea74e03656d66e7a965045e2858cf3d6055 +size 4938059120 diff --git a/model-00140-of-00194.safetensors b/model-00140-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..79ef8b9e23ead902ca046cf98ef04573cb6688f7 --- /dev/null +++ b/model-00140-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c1e28b5ceb15372d9288186387721fbfaffbd74f8dae9f7e4b8429646307fc0 +size 4950860960 diff --git a/model-00141-of-00194.safetensors b/model-00141-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c99e323eae217db751cb864d3d4543036c864b31 --- /dev/null +++ b/model-00141-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebed7aaa4d9963629b4299e27edee8215b7639a4f550e563b3eba396176c2d28 +size 4950860960 diff --git a/model-00142-of-00194.safetensors b/model-00142-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ccaa730ace231766eb9ace537e1a63563a49b551 --- /dev/null +++ b/model-00142-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c812955c0abe152623d32bbd828eac3fdc4acbf98a14d3677bf0b7943748ef56 +size 4950860960 diff --git a/model-00143-of-00194.safetensors b/model-00143-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b907e31ec2ad5365da595a26ceeb90f8118f4ddb --- /dev/null +++ b/model-00143-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:143aa0e50bf1472dce96e00e3aa2eab034e2a453627e6d43e61fe2df2e1192fb +size 4950861000 diff --git a/model-00144-of-00194.safetensors b/model-00144-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bfe6657e5bec2eada505cfcae41a6a566b29641f --- /dev/null +++ b/model-00144-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f86ab274a885debbe47beac68ffabc212f2ea2da0918f0050e7fdb64895756ff +size 4938059168 diff --git a/model-00145-of-00194.safetensors b/model-00145-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b8fa5593ff5c13ab20b0f4863bc2e914d672f19 --- /dev/null +++ b/model-00145-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ae5ab1c5e724953553e1d395e9844a350093e47151044458a9a67afd9b89e43 +size 4950860952 diff --git a/model-00146-of-00194.safetensors b/model-00146-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca0b3ff10cb8f9ccb131c8371dcea1f1f009eea9 --- /dev/null +++ b/model-00146-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b896ef6b6e17a9bc3755aeb26675e8c47e7ad773f105544fa19d66b67843d3f +size 4950860960 diff --git a/model-00147-of-00194.safetensors b/model-00147-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0d91fb0ca3e89cc254ad4a83955895c615a8f806 --- /dev/null +++ b/model-00147-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d124bdffc8643568d438f3146f29b4d9ae626a1d77ee65c6225f61c5ffc7563 +size 4950860960 diff --git a/model-00148-of-00194.safetensors b/model-00148-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61b42be9ebc137811696663ba2e2d48ad108c091 --- /dev/null +++ b/model-00148-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7503384fb0af93c721cab7f882f29a143cd41dcf7dd32b21340492b6cd45ecd0 +size 4950860968 diff --git a/model-00149-of-00194.safetensors b/model-00149-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..750b92d0a10a38986cf8056de71023ed7aa7c436 --- /dev/null +++ b/model-00149-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bf00b0f7de6d2de5cb5a9d99e2b4e9c65fe39e3cfc5c3ac31a4ab483fe3596f +size 4950861032 diff --git a/model-00150-of-00194.safetensors b/model-00150-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b7f27efd6a8d4fcdc883a98da4e32119d9d62cc --- /dev/null +++ b/model-00150-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db9f419822d1b74439cb55f4ee748ee02ce696772a96fad184cd33184af99428 +size 4938059128 diff --git a/model-00151-of-00194.safetensors b/model-00151-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10544e5bb25a2345633d641e6f27df7af7b3faa6 --- /dev/null +++ b/model-00151-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fb016b4acd2a071c9df1b6a6f444fae3dea714540ce58f0f7d0444c71e2678d +size 4950860960 diff --git a/model-00152-of-00194.safetensors b/model-00152-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e2c6a67c6a80b1d4a7c05985a2dd9d8ad73f059e --- /dev/null +++ b/model-00152-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c558749eef74d19d358cc7123cc3eec4ee925c211102802b256108b79acd1830 +size 4950860960 diff --git a/model-00153-of-00194.safetensors b/model-00153-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea47be913e061051f66188b6f9ecf900c1a09e47 --- /dev/null +++ b/model-00153-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb040ed3e417cc7f342eb650087044ed72e4dee02b4748f49b2a26abf9e5f084 +size 4950860960 diff --git a/model-00154-of-00194.safetensors b/model-00154-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf5dad14e7d06b699585ef6eab88c29df452cd0c --- /dev/null +++ b/model-00154-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31fbbfd31009d299009e8fe275f81255155790890214d97b0d5a9cb559d194bd +size 4950861000 diff --git a/model-00155-of-00194.safetensors b/model-00155-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d938d68d329644a77ab08f9fccab899111e1b325 --- /dev/null +++ b/model-00155-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce9fcd84c5b3472ac91e505417486e0f5307580d2786f7b70b478350ff896ad2 +size 4938059176 diff --git a/model-00156-of-00194.safetensors b/model-00156-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d07c4151b788ea52951998b20bc1211e56284e69 --- /dev/null +++ b/model-00156-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:829f4805812e5db0d4001f13be6d429021400577262350e7c3ac337a971ba054 +size 4950860952 diff --git a/model-00157-of-00194.safetensors b/model-00157-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..73a04e69e18d28acd59cabea55bccdaa8a3c17a9 --- /dev/null +++ b/model-00157-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d8a35b5fc36b3e3842ba1d2d2618187bba9a90fa05f827b137b649a15125be +size 4950860960 diff --git a/model-00158-of-00194.safetensors b/model-00158-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf153f740ea22f194d86324187a3f96ce1dbb117 --- /dev/null +++ b/model-00158-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8678794b10053f43f756950dbbde36faf9c4ef621400204f875f1ca36a2399d +size 4950860960 diff --git a/model-00159-of-00194.safetensors b/model-00159-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0ca3e301aba24c37afd21523fa7b50e0b13c670 --- /dev/null +++ b/model-00159-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:536fe439547a0e32e01c45f6cc63aa6ca86a15cf451a91635dd19a7539255b34 +size 4950860960 diff --git a/model-00160-of-00194.safetensors b/model-00160-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..400e7a2cf40b10cac0cc72d42397a69e2ca1b569 --- /dev/null +++ b/model-00160-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7ec062f8365c2f061919599743921093bf5c571d0c7d631c7e81ea233f3e6d +size 4950861032 diff --git a/model-00161-of-00194.safetensors b/model-00161-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..623cc7c98ed92393c913407e2b26358dc4c5105d --- /dev/null +++ b/model-00161-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6a75393579bb94a2b99074e00a9164ef488a5b827d8f711436f985901eb837b +size 4938059128 diff --git a/model-00162-of-00194.safetensors b/model-00162-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f8f6bed4fc5e2881420c261ef89483f1e47a0a6 --- /dev/null +++ b/model-00162-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:844a8977484567cd65d39dcf45252f5e18bcd5dd364ba66db2b2944d595ea9f2 +size 4950860960 diff --git a/model-00163-of-00194.safetensors b/model-00163-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee03319f92285517634a13903d7eed37d15edaaa --- /dev/null +++ b/model-00163-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12f236c3b4a076e27de85a9acc6f56da769e2700c813b17ccf792334317bb8bc +size 4950860960 diff --git a/model-00164-of-00194.safetensors b/model-00164-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60a9a7fece6aa42416b963c0f8f861b87cbb5ddc --- /dev/null +++ b/model-00164-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1cd6393a2a1e5f4213d86495876d1953e997d84ef9f2c3b51c71e6d71f4bb66 +size 4950860960 diff --git a/model-00165-of-00194.safetensors b/model-00165-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15dd6241d5511965904044d259c78c018d3da0d5 --- /dev/null +++ b/model-00165-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca2b8a87a921411dd50de6fa5edfc5e3ac52a10fea8caca6b24ee3d517fe14d5 +size 4950861000 diff --git a/model-00166-of-00194.safetensors b/model-00166-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cfb1cfd1140ca910911b1e74d25c91b03a5d6d1c --- /dev/null +++ b/model-00166-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72d45af965ec30809863dff0d6055efd9d2f1366882d1c2f2d6af59af4d6361d +size 4938059184 diff --git a/model-00167-of-00194.safetensors b/model-00167-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..502c011b1e109a563a93f0a78efde6884d7f4cc8 --- /dev/null +++ b/model-00167-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bd62964543e2fe5452847300564b722b32dc5c5d52ad3f133c21f57823017ea +size 4950860944 diff --git a/model-00168-of-00194.safetensors b/model-00168-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c0fa40d1ce6d30ca21dab7e8fe93a90f27b892e --- /dev/null +++ b/model-00168-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75eb80c101963dd89527288ff38aba41bdae3cd1b785103a04ada1fbdb5ef984 +size 4950860960 diff --git a/model-00169-of-00194.safetensors b/model-00169-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d8d3139543c21c6dfcfecbd2408b106114b90566 --- /dev/null +++ b/model-00169-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b634df8fd99bfb6e1463ecebf8ede1c74e79430b1ddb509a69223b063fdcad55 +size 4950860960 diff --git a/model-00170-of-00194.safetensors b/model-00170-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e70ec65c36716c5a60dd53602672773ec6cb8d55 --- /dev/null +++ b/model-00170-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6a18107824cd3b037a37168855979c434bd916a22b7abc99b9cc7485e7b147a +size 4950860960 diff --git a/model-00171-of-00194.safetensors b/model-00171-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..38ad770e87aa07f86c4311cacf18d4a54111d755 --- /dev/null +++ b/model-00171-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e5095c729263cc0802bb78338953661725f8836ab01fe6e3fb7f7c7ff5f0354 +size 4950861032 diff --git a/model-00172-of-00194.safetensors b/model-00172-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..47bad83befbc21709da0a788d3d8d3dbf323de99 --- /dev/null +++ b/model-00172-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:487723af0214ca7f891d9f6f38d5502fa48c8b0a37fe7f9f7d6b16f8faca2374 +size 4938059128 diff --git a/model-00173-of-00194.safetensors b/model-00173-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e0f93838171ec5898b713a96ae68fad5576bea9 --- /dev/null +++ b/model-00173-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6405df25b6ae02846df15ece7156aafc64f0952c832955f9513742f72ba50c6 +size 4950860960 diff --git a/model-00174-of-00194.safetensors b/model-00174-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b5390416550b257dce23ed97c1f84d4c989a91e --- /dev/null +++ b/model-00174-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc54a2ae41edfbe52a61f188d2c339f6bf0bd28f526b0041869c83c5a527bb1f +size 4950860960 diff --git a/model-00175-of-00194.safetensors b/model-00175-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..750bb700567bcdeb30fb1a0de7c7259eaaed8c39 --- /dev/null +++ b/model-00175-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bb691c1a963814a95235db65403f29ff7fa9584e89ead4c97e272ec4958d798 +size 4950860960 diff --git a/model-00176-of-00194.safetensors b/model-00176-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2e3e10fb5e321e72de2e068dd5b096716addf60d --- /dev/null +++ b/model-00176-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59780a7e651146e186e8c820722083cf80812eb2858968fc3d48be1fabff609c +size 4950860992 diff --git a/model-00177-of-00194.safetensors b/model-00177-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c12422ab69ff96dc67154216b8eb9ea4ea6b271 --- /dev/null +++ b/model-00177-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80d99757b5f18b68b8345e38ec8ee50d34c7c7ba04166d05b6ae6781353b6571 +size 4938059184 diff --git a/model-00178-of-00194.safetensors b/model-00178-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c962390b5af6517174f44b20794e8ddac0ab5fa2 --- /dev/null +++ b/model-00178-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69cd080f5a37ecaa04f5be10dc2c12ed2f6ee0f15bed682933189c20493c2d2a +size 4950860944 diff --git a/model-00179-of-00194.safetensors b/model-00179-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ec0ce55c8ad1c250abb380ce606822e18878e9c --- /dev/null +++ b/model-00179-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d933ce273a9be8afc4c5ae6e6d6b996bcddd2d57ab8e9323eccc5b936336c46f +size 4950860960 diff --git a/model-00180-of-00194.safetensors b/model-00180-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c5ed93fad52e3f590beffb65c7fd9065475b8c6c --- /dev/null +++ b/model-00180-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff646c1948578b818c5530088f8fd958a6ccea27a7a6d76bfb3a6727942d7bcd +size 4950860960 diff --git a/model-00181-of-00194.safetensors b/model-00181-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2cc31ca55a0d565fcc59e8757f4d55a6d2113dbb --- /dev/null +++ b/model-00181-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2773b8e700498c35610207cc7d4fa42e9b3d8b71776ecdaacdacb48cef29b133 +size 4950860960 diff --git a/model-00182-of-00194.safetensors b/model-00182-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..92bf14bdaa645619fff9dd62d6ba0f88c94228a0 --- /dev/null +++ b/model-00182-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f0f1c85206fdf71d81bf489af719a04836163cfc79fe65fc26679fa13b6e2d0 +size 4950861032 diff --git a/model-00183-of-00194.safetensors b/model-00183-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8cce34d9b01bc522bf311e7ef21f4869ce91d72d --- /dev/null +++ b/model-00183-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d27efb36493ede2e26fbfc54012709cd8a8c3c71bb9f94cda762e4fdd4a0545b +size 4938059136 diff --git a/model-00184-of-00194.safetensors b/model-00184-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6eba8fb2097ae8a6fb0803204c3030862d2272f3 --- /dev/null +++ b/model-00184-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8efba6d80ce548055af5883e5faced9685268a25bdb370a6855c7b2420132b1 +size 4950860960 diff --git a/model-00185-of-00194.safetensors b/model-00185-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04294c26308187cd26e0ccb4474bc1ecb12f02bc --- /dev/null +++ b/model-00185-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cec3310e0781d672ab78d8e102b426a2838decfdcd7338654eea66c8d15f33d6 +size 4950860960 diff --git a/model-00186-of-00194.safetensors b/model-00186-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..89cd460d0994afbd48ac87162288aac64e632f10 --- /dev/null +++ b/model-00186-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05322b8e0fdd8f63377a1f5b85709bb951d1684d2ee93400ac944319ed4deadc +size 4950860960 diff --git a/model-00187-of-00194.safetensors b/model-00187-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4c2538e98b2d82b90f5be354d273fee3abd54630 --- /dev/null +++ b/model-00187-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b85eb41ca69310cb60579b52f09c410ace18ffdb8eff9cda4cc2f63fe00efb96 +size 4950860992 diff --git a/model-00188-of-00194.safetensors b/model-00188-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..188565af4ea8204520c58702c3e94a4bc0d451bf --- /dev/null +++ b/model-00188-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8951b14663499bd3e814d6456bb11887122efa5704e16be3790feb991f23ca9 +size 4938059192 diff --git a/model-00189-of-00194.safetensors b/model-00189-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07444ebcc99ccfe9d891e232914741095a685c11 --- /dev/null +++ b/model-00189-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93acd226161b0279599618c5c18328871ad5c6c0f3e17736ecee6e9f440d89a9 +size 4950860944 diff --git a/model-00190-of-00194.safetensors b/model-00190-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..51ea71584bbb28878bec31e4f9be5249132987d4 --- /dev/null +++ b/model-00190-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f5bc93e9e9ac77cd85ca082aeecb3245ecc067590cc97f13aa51e86fb6c418d +size 4950860960 diff --git a/model-00191-of-00194.safetensors b/model-00191-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b92cfacbbf41c97b69810e2f2a636355c945345b --- /dev/null +++ b/model-00191-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4a99217216e5346fc339b13088d26374b571a3035d103586cf58c8214bd101a +size 4950860960 diff --git a/model-00192-of-00194.safetensors b/model-00192-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4309f8b7f93b370a5aaadff3723a16f5da662ebd --- /dev/null +++ b/model-00192-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97802e9ac02d08ad5f83217db9d44f71687a5c9be57eddc07ad8cbf77e2d8e29 +size 4950860960 diff --git a/model-00193-of-00194.safetensors b/model-00193-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df7f5996fa1b7dff2f84a8f59e01707619976c4d --- /dev/null +++ b/model-00193-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c978e0db4f866030aeaf5b98b26431fe83972a6787667e8bec605857dcd86d2a +size 4950861024 diff --git a/model-00194-of-00194.safetensors b/model-00194-of-00194.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..652595d12a62c5a3e4e7d5106c2d38f7939b2c33 --- /dev/null +++ b/model-00194-of-00194.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fb754c88ecb085691fbdad22854e63caf2b4aebc776812e0169ef77da553f2e +size 2091969896 diff --git a/model.safetensors.index.json b/model.safetensors.index.json index 6f59e65ba117954e5d8c011d5ca07fc3a22d66a5..c1f7f106a76b1a5e4519eb42718295a872e8a4d9 100644 --- a/model.safetensors.index.json +++ b/model.safetensors.index.json @@ -1,13835 +1,13835 @@ { "metadata": { - "total_size": 963334844416 + "total_size": 957169217536 }, "weight_map": { - "lm_head.weight": "model-00195-of-00195.safetensors", - "model.embed_tokens.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.100.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.100.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.100.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.101.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.101.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.101.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.102.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.102.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.102.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.103.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.103.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.103.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.104.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.104.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.104.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.105.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.105.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.105.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.106.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.106.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.106.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.107.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.107.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.107.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.108.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.108.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.108.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.109.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.109.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.109.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.110.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.110.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.110.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.111.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.111.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.111.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.112.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.112.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.112.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.113.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.113.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.113.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.114.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.114.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.114.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.115.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.115.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.115.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.116.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.116.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.116.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.117.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.117.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.117.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.118.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.118.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.118.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.119.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.119.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.119.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.120.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.120.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.120.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.121.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.121.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.121.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.122.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.122.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.122.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.123.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.123.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.123.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.124.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.124.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.124.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.125.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.125.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.125.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.126.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.126.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.126.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.127.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.127.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.127.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.16.w1.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.16.w2.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.16.w3.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.17.w1.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.17.w2.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.17.w3.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.18.w1.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.18.w2.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.18.w3.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.19.w1.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.19.w2.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.19.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.20.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.20.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.20.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.21.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.21.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.21.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.22.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.22.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.22.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.23.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.23.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.23.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.24.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.24.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.24.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.25.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.25.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.25.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.26.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.26.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.26.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.27.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.27.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.27.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.28.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.28.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.28.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.29.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.29.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.29.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.30.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.30.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.30.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.31.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.31.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.31.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.32.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.32.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.32.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.33.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.33.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.33.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.34.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.34.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.34.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.35.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.35.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.35.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.36.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.36.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.36.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.37.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.37.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.37.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.38.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.38.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.38.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.39.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.39.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.39.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.40.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.40.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.40.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.41.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.41.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.41.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.42.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.42.w2.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.42.w3.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.43.w1.weight": "model-00002-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.43.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.43.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.44.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.44.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.44.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.45.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.45.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.45.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.46.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.46.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.46.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.47.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.47.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.47.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.48.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.48.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.48.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.49.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.49.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.49.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.50.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.50.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.50.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.51.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.51.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.51.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.52.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.52.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.52.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.53.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.53.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.53.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.54.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.54.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.54.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.55.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.55.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.55.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.56.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.56.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.56.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.57.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.57.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.57.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.58.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.58.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.58.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.59.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.59.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.59.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.60.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.60.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.60.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.61.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.61.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.61.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.62.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.62.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.62.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.63.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.63.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.63.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.64.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.64.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.64.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.65.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.65.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.65.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.66.w1.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.66.w2.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.66.w3.weight": "model-00003-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.67.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.67.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.67.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.68.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.68.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.68.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.69.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.69.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.69.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.70.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.70.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.70.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.71.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.71.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.71.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.72.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.72.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.72.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.73.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.73.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.73.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.74.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.74.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.74.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.75.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.75.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.75.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.76.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.76.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.76.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.77.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.77.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.77.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.78.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.78.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.78.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.79.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.79.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.79.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.80.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.80.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.80.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.81.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.81.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.81.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.82.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.82.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.82.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.83.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.83.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.83.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.84.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.84.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.84.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.85.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.85.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.85.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.86.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.86.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.86.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.87.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.87.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.87.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.88.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.88.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.88.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.89.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.89.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.89.w3.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.90.w1.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.90.w2.weight": "model-00004-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.90.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.91.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.91.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.91.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.92.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.92.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.92.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.93.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.93.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.93.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.94.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.94.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.94.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.95.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.95.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.95.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.96.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.96.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.96.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.97.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.97.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.97.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.98.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.98.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.98.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.99.w1.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.99.w2.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.experts.99.w3.weight": "model-00005-of-00195.safetensors", - "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00195.safetensors", - "model.layers.0.input_layernorm.weight": "model-00006-of-00195.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00006-of-00195.safetensors", - "model.layers.0.residual_layernorm.weight": "model-00006-of-00195.safetensors", - "model.layers.0.residual_mlp.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.0.residual_mlp.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.0.residual_mlp.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00195.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00195.safetensors", - "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00195.safetensors", - "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.10.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.10.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.10.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.100.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.100.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.100.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.101.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.101.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.101.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.102.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.102.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.102.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.103.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.103.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.103.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.104.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.104.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.104.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.105.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.105.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.105.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.106.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.106.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.106.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.107.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.107.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.107.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.108.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.108.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.108.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.109.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.109.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.109.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.11.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.11.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.11.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.110.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.110.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.110.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.111.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.111.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.111.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.112.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.112.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.112.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.113.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.113.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.113.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.114.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.114.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.114.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.115.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.115.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.115.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.116.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.116.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.116.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.117.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.117.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.117.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.118.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.118.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.118.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.119.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.119.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.119.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.12.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.12.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.12.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.120.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.120.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.120.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.121.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.121.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.121.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.122.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.122.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.122.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.123.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.123.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.123.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.124.w1.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.124.w2.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.124.w3.weight": "model-00011-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.125.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.125.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.125.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.126.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.126.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.126.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.127.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.127.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.127.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.13.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.13.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.13.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.14.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.14.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.14.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.15.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.15.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.15.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.16.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.16.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.16.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.17.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.17.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.17.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.18.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.18.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.18.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.19.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.19.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.19.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.20.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.20.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.20.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.21.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.21.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.21.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.22.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.22.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.22.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.23.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.23.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.23.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.24.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.24.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.24.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.25.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.25.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.25.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.26.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.26.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.26.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.27.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.27.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.27.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.28.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.28.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.28.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.29.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.29.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.29.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.30.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.30.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.30.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.31.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.31.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.31.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.32.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.32.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.32.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.33.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.33.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.33.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.34.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.34.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.34.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.35.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.35.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.35.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.36.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.36.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.36.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.37.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.37.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.37.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.38.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.38.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.38.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.39.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.39.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.39.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.40.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.40.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.40.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.41.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.41.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.41.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.42.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.42.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.42.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.43.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.43.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.43.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.44.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.44.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.44.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.45.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.45.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.45.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.46.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.46.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.46.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.47.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.47.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.47.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.48.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.48.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.48.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.49.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.49.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.49.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.50.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.50.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.50.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.51.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.51.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.51.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.52.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.52.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.52.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.53.w1.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.53.w2.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.53.w3.weight": "model-00008-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.54.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.54.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.54.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.55.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.55.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.55.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.56.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.56.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.56.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.57.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.57.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.57.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.58.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.58.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.58.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.59.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.59.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.59.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.60.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.60.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.60.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.61.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.61.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.61.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.62.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.62.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.62.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.63.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.63.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.63.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.64.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.64.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.64.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.65.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.65.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.65.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.66.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.66.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.66.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.67.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.67.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.67.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.68.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.68.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.68.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.69.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.69.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.69.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.70.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.70.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.70.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.71.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.71.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.71.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.72.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.72.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.72.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.73.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.73.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.73.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.74.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.74.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.74.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.75.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.75.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.75.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.76.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.76.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.76.w3.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.77.w1.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.77.w2.weight": "model-00009-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.77.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.78.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.78.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.78.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.79.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.79.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.79.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.8.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.8.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.8.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.80.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.80.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.80.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.81.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.81.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.81.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.82.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.82.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.82.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.83.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.83.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.83.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.84.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.84.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.84.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.85.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.85.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.85.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.86.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.86.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.86.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.87.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.87.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.87.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.88.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.88.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.88.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.89.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.89.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.89.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.9.w1.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.9.w2.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.9.w3.weight": "model-00007-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.90.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.90.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.90.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.91.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.91.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.91.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.92.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.92.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.92.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.93.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.93.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.93.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.94.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.94.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.94.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.95.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.95.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.95.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.96.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.96.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.96.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.97.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.97.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.97.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.98.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.98.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.98.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.99.w1.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.99.w2.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.experts.99.w3.weight": "model-00010-of-00195.safetensors", - "model.layers.1.block_sparse_moe.gate.weight": "model-00006-of-00195.safetensors", - "model.layers.1.input_layernorm.weight": "model-00012-of-00195.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00012-of-00195.safetensors", - "model.layers.1.residual_layernorm.weight": "model-00012-of-00195.safetensors", - "model.layers.1.residual_mlp.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.1.residual_mlp.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.1.residual_mlp.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.1.self_attn.k_proj.weight": "model-00006-of-00195.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00006-of-00195.safetensors", - "model.layers.1.self_attn.q_proj.weight": "model-00006-of-00195.safetensors", - "model.layers.1.self_attn.v_proj.weight": "model-00006-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.10.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.10.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.10.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.100.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.100.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.100.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.101.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.101.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.101.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.102.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.102.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.102.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.103.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.103.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.103.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.104.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.104.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.104.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.105.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.105.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.105.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.106.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.106.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.106.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.107.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.107.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.107.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.108.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.108.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.108.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.109.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.109.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.109.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.11.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.11.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.11.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.110.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.110.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.110.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.111.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.111.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.111.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.112.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.112.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.112.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.113.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.113.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.113.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.114.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.114.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.114.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.115.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.115.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.115.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.116.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.116.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.116.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.117.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.117.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.117.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.118.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.118.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.118.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.119.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.119.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.119.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.12.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.12.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.12.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.120.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.120.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.120.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.121.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.121.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.121.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.122.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.122.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.122.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.123.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.123.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.123.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.124.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.124.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.124.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.125.w1.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.125.w2.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.125.w3.weight": "model-00061-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.126.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.126.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.126.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.127.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.127.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.127.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.13.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.13.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.13.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.14.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.14.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.14.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.15.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.15.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.15.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.16.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.16.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.16.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.17.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.17.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.17.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.18.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.18.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.18.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.19.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.19.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.19.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.20.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.20.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.20.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.21.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.21.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.21.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.22.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.22.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.22.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.23.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.23.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.23.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.24.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.24.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.24.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.25.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.25.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.25.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.26.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.26.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.26.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.27.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.27.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.27.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.28.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.28.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.28.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.29.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.29.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.29.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.30.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.30.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.30.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.31.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.31.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.31.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.32.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.32.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.32.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.33.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.33.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.33.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.34.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.34.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.34.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.35.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.35.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.35.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.36.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.36.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.36.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.37.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.37.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.37.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.38.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.38.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.38.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.39.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.39.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.39.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.40.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.40.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.40.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.41.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.41.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.41.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.42.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.42.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.42.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.43.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.43.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.43.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.44.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.44.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.44.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.45.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.45.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.45.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.46.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.46.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.46.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.47.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.47.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.47.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.48.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.48.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.48.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.49.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.49.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.49.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.50.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.50.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.50.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.51.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.51.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.51.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.52.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.52.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.52.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.53.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.53.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.53.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.54.w1.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.54.w2.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.54.w3.weight": "model-00058-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.55.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.55.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.55.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.56.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.56.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.56.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.57.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.57.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.57.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.58.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.58.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.58.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.59.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.59.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.59.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.60.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.60.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.60.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.61.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.61.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.61.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.62.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.62.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.62.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.63.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.63.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.63.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.64.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.64.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.64.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.65.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.65.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.65.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.66.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.66.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.66.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.67.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.67.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.67.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.68.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.68.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.68.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.69.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.69.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.69.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.70.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.70.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.70.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.71.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.71.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.71.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.72.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.72.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.72.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.73.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.73.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.73.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.74.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.74.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.74.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.75.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.75.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.75.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.76.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.76.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.76.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.77.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.77.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.77.w3.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.78.w1.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.78.w2.weight": "model-00059-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.78.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.79.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.79.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.79.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.8.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.8.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.8.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.80.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.80.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.80.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.81.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.81.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.81.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.82.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.82.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.82.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.83.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.83.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.83.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.84.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.84.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.84.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.85.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.85.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.85.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.86.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.86.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.86.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.87.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.87.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.87.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.88.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.88.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.88.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.89.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.89.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.89.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.9.w1.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.9.w2.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.9.w3.weight": "model-00057-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.90.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.90.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.90.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.91.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.91.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.91.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.92.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.92.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.92.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.93.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.93.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.93.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.94.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.94.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.94.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.95.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.95.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.95.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.96.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.96.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.96.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.97.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.97.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.97.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.98.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.98.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.98.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.99.w1.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.99.w2.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.experts.99.w3.weight": "model-00060-of-00195.safetensors", - "model.layers.10.block_sparse_moe.gate.weight": "model-00056-of-00195.safetensors", - "model.layers.10.input_layernorm.weight": "model-00062-of-00195.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00062-of-00195.safetensors", - "model.layers.10.residual_layernorm.weight": "model-00062-of-00195.safetensors", - "model.layers.10.residual_mlp.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.10.residual_mlp.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.10.residual_mlp.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.10.self_attn.k_proj.weight": "model-00056-of-00195.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00056-of-00195.safetensors", - "model.layers.10.self_attn.q_proj.weight": "model-00056-of-00195.safetensors", - "model.layers.10.self_attn.v_proj.weight": "model-00056-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.10.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.10.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.10.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.100.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.100.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.100.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.101.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.101.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.101.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.102.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.102.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.102.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.103.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.103.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.103.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.104.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.104.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.104.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.105.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.105.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.105.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.106.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.106.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.106.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.107.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.107.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.107.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.108.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.108.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.108.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.109.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.109.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.109.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.11.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.11.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.11.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.110.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.110.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.110.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.111.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.111.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.111.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.112.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.112.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.112.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.113.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.113.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.113.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.114.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.114.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.114.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.115.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.115.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.115.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.116.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.116.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.116.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.117.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.117.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.117.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.118.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.118.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.118.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.119.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.119.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.119.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.12.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.12.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.12.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.120.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.120.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.120.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.121.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.121.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.121.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.122.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.122.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.122.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.123.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.123.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.123.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.124.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.124.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.124.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.125.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.125.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.125.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.126.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.126.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.126.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.127.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.127.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.127.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.13.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.13.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.13.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.14.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.14.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.14.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.15.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.15.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.15.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.16.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.16.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.16.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.17.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.17.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.17.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.18.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.18.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.18.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.19.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.19.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.19.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.20.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.20.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.20.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.21.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.21.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.21.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.22.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.22.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.22.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.23.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.23.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.23.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.24.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.24.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.24.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.25.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.25.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.25.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.26.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.26.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.26.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.27.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.27.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.27.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.28.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.28.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.28.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.29.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.29.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.29.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.30.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.30.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.30.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.31.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.31.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.31.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.32.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.32.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.32.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.33.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.33.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.33.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.34.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.34.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.34.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.35.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.35.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.35.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.36.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.36.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.36.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.37.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.37.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.37.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.38.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.38.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.38.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.39.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.39.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.39.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.40.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.40.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.40.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.41.w1.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.41.w2.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.41.w3.weight": "model-00063-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.42.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.42.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.42.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.43.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.43.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.43.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.44.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.44.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.44.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.45.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.45.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.45.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.46.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.46.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.46.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.47.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.47.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.47.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.48.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.48.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.48.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.49.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.49.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.49.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.50.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.50.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.50.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.51.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.51.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.51.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.52.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.52.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.52.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.53.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.53.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.53.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.54.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.54.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.54.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.55.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.55.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.55.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.56.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.56.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.56.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.57.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.57.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.57.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.58.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.58.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.58.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.59.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.59.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.59.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.60.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.60.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.60.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.61.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.61.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.61.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.62.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.62.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.62.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.63.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.63.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.63.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.64.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.64.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.64.w3.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.65.w1.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.65.w2.weight": "model-00064-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.65.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.66.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.66.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.66.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.67.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.67.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.67.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.68.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.68.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.68.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.69.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.69.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.69.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.70.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.70.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.70.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.71.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.71.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.71.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.72.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.72.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.72.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.73.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.73.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.73.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.74.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.74.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.74.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.75.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.75.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.75.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.76.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.76.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.76.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.77.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.77.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.77.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.78.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.78.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.78.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.79.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.79.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.79.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.8.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.8.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.8.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.80.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.80.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.80.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.81.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.81.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.81.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.82.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.82.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.82.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.83.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.83.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.83.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.84.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.84.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.84.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.85.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.85.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.85.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.86.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.86.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.86.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.87.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.87.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.87.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.88.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.88.w2.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.88.w3.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.89.w1.weight": "model-00065-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.89.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.89.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.9.w1.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.9.w2.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.9.w3.weight": "model-00062-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.90.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.90.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.90.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.91.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.91.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.91.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.92.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.92.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.92.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.93.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.93.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.93.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.94.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.94.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.94.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.95.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.95.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.95.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.96.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.96.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.96.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.97.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.97.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.97.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.98.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.98.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.98.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.99.w1.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.99.w2.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.experts.99.w3.weight": "model-00066-of-00195.safetensors", - "model.layers.11.block_sparse_moe.gate.weight": "model-00062-of-00195.safetensors", - "model.layers.11.input_layernorm.weight": "model-00067-of-00195.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00067-of-00195.safetensors", - "model.layers.11.residual_layernorm.weight": "model-00067-of-00195.safetensors", - "model.layers.11.residual_mlp.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.11.residual_mlp.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.11.residual_mlp.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.11.self_attn.k_proj.weight": "model-00062-of-00195.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00062-of-00195.safetensors", - "model.layers.11.self_attn.q_proj.weight": "model-00062-of-00195.safetensors", - "model.layers.11.self_attn.v_proj.weight": "model-00062-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.10.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.10.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.10.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.100.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.100.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.100.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.101.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.101.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.101.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.102.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.102.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.102.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.103.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.103.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.103.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.104.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.104.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.104.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.105.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.105.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.105.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.106.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.106.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.106.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.107.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.107.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.107.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.108.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.108.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.108.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.109.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.109.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.109.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.11.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.11.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.11.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.110.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.110.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.110.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.111.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.111.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.111.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.112.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.112.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.112.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.113.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.113.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.113.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.114.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.114.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.114.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.115.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.115.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.115.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.116.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.116.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.116.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.117.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.117.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.117.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.118.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.118.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.118.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.119.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.119.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.119.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.12.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.12.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.12.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.120.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.120.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.120.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.121.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.121.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.121.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.122.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.122.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.122.w3.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.123.w1.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.123.w2.weight": "model-00072-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.123.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.124.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.124.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.124.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.125.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.125.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.125.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.126.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.126.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.126.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.127.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.127.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.127.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.13.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.13.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.13.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.14.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.14.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.14.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.15.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.15.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.15.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.16.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.16.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.16.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.17.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.17.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.17.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.18.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.18.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.18.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.19.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.19.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.19.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.20.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.20.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.20.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.21.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.21.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.21.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.22.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.22.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.22.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.23.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.23.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.23.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.24.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.24.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.24.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.25.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.25.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.25.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.26.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.26.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.26.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.27.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.27.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.27.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.28.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.28.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.28.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.29.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.29.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.29.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.30.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.30.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.30.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.31.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.31.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.31.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.32.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.32.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.32.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.33.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.33.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.33.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.34.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.34.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.34.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.35.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.35.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.35.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.36.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.36.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.36.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.37.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.37.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.37.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.38.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.38.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.38.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.39.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.39.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.39.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00067-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00067-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.40.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.40.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.40.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.41.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.41.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.41.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.42.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.42.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.42.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.43.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.43.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.43.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.44.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.44.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.44.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.45.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.45.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.45.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.46.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.46.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.46.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.47.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.47.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.47.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.48.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.48.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.48.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.49.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.49.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.49.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00067-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.50.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.50.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.50.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.51.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.51.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.51.w3.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.52.w1.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.52.w2.weight": "model-00069-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.52.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.53.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.53.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.53.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.54.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.54.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.54.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.55.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.55.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.55.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.56.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.56.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.56.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.57.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.57.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.57.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.58.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.58.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.58.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.59.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.59.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.59.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.60.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.60.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.60.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.61.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.61.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.61.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.62.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.62.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.62.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.63.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.63.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.63.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.64.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.64.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.64.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.65.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.65.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.65.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.66.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.66.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.66.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.67.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.67.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.67.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.68.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.68.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.68.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.69.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.69.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.69.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.70.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.70.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.70.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.71.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.71.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.71.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.72.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.72.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.72.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.73.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.73.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.73.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.74.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.74.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.74.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.75.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.75.w2.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.75.w3.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.76.w1.weight": "model-00070-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.76.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.76.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.77.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.77.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.77.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.78.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.78.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.78.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.79.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.79.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.79.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.8.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.8.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.8.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.80.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.80.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.80.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.81.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.81.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.81.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.82.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.82.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.82.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.83.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.83.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.83.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.84.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.84.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.84.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.85.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.85.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.85.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.86.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.86.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.86.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.87.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.87.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.87.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.88.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.88.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.88.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.89.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.89.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.89.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.9.w1.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.9.w2.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.9.w3.weight": "model-00068-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.90.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.90.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.90.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.91.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.91.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.91.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.92.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.92.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.92.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.93.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.93.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.93.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.94.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.94.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.94.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.95.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.95.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.95.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.96.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.96.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.96.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.97.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.97.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.97.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.98.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.98.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.98.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.99.w1.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.99.w2.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.experts.99.w3.weight": "model-00071-of-00195.safetensors", - "model.layers.12.block_sparse_moe.gate.weight": "model-00067-of-00195.safetensors", - "model.layers.12.input_layernorm.weight": "model-00073-of-00195.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00073-of-00195.safetensors", - "model.layers.12.residual_layernorm.weight": "model-00073-of-00195.safetensors", - "model.layers.12.residual_mlp.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.12.residual_mlp.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.12.residual_mlp.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.12.self_attn.k_proj.weight": "model-00067-of-00195.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00067-of-00195.safetensors", - "model.layers.12.self_attn.q_proj.weight": "model-00067-of-00195.safetensors", - "model.layers.12.self_attn.v_proj.weight": "model-00067-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.10.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.10.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.10.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.100.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.100.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.100.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.101.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.101.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.101.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.102.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.102.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.102.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.103.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.103.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.103.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.104.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.104.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.104.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.105.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.105.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.105.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.106.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.106.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.106.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.107.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.107.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.107.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.108.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.108.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.108.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.109.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.109.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.109.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.11.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.11.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.11.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.110.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.110.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.110.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.111.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.111.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.111.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.112.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.112.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.112.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.113.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.113.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.113.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.114.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.114.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.114.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.115.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.115.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.115.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.116.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.116.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.116.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.117.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.117.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.117.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.118.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.118.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.118.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.119.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.119.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.119.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.12.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.12.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.12.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.120.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.120.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.120.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.121.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.121.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.121.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.122.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.122.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.122.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.123.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.123.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.123.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.124.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.124.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.124.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.125.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.125.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.125.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.126.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.126.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.126.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.127.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.127.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.127.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.13.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.13.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.13.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.14.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.14.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.14.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.15.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.15.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.15.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.16.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.16.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.16.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.17.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.17.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.17.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.18.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.18.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.18.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.19.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.19.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.19.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.20.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.20.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.20.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.21.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.21.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.21.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.22.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.22.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.22.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.23.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.23.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.23.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.24.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.24.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.24.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.25.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.25.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.25.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.26.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.26.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.26.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.27.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.27.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.27.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.28.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.28.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.28.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.29.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.29.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.29.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.30.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.30.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.30.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.31.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.31.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.31.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.32.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.32.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.32.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.33.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.33.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.33.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.34.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.34.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.34.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.35.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.35.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.35.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.36.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.36.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.36.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.37.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.37.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.37.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.38.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.38.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.38.w3.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.39.w1.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.39.w2.weight": "model-00074-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.39.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.40.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.40.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.40.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.41.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.41.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.41.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.42.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.42.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.42.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.43.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.43.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.43.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.44.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.44.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.44.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.45.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.45.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.45.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.46.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.46.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.46.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.47.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.47.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.47.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.48.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.48.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.48.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.49.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.49.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.49.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.50.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.50.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.50.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.51.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.51.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.51.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.52.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.52.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.52.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.53.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.53.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.53.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.54.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.54.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.54.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.55.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.55.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.55.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.56.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.56.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.56.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.57.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.57.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.57.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.58.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.58.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.58.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.59.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.59.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.59.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.60.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.60.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.60.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.61.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.61.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.61.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.62.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.62.w2.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.62.w3.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.63.w1.weight": "model-00075-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.63.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.63.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.64.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.64.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.64.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.65.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.65.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.65.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.66.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.66.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.66.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.67.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.67.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.67.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.68.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.68.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.68.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.69.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.69.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.69.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.70.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.70.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.70.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.71.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.71.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.71.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.72.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.72.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.72.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.73.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.73.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.73.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.74.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.74.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.74.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.75.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.75.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.75.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.76.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.76.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.76.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.77.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.77.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.77.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.78.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.78.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.78.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.79.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.79.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.79.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.8.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.8.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.8.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.80.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.80.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.80.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.81.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.81.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.81.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.82.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.82.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.82.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.83.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.83.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.83.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.84.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.84.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.84.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.85.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.85.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.85.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.86.w1.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.86.w2.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.86.w3.weight": "model-00076-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.87.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.87.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.87.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.88.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.88.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.88.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.89.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.89.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.89.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.9.w1.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.9.w2.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.9.w3.weight": "model-00073-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.90.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.90.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.90.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.91.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.91.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.91.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.92.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.92.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.92.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.93.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.93.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.93.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.94.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.94.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.94.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.95.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.95.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.95.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.96.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.96.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.96.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.97.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.97.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.97.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.98.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.98.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.98.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.99.w1.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.99.w2.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.experts.99.w3.weight": "model-00077-of-00195.safetensors", - "model.layers.13.block_sparse_moe.gate.weight": "model-00073-of-00195.safetensors", - "model.layers.13.input_layernorm.weight": "model-00078-of-00195.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00078-of-00195.safetensors", - "model.layers.13.residual_layernorm.weight": "model-00078-of-00195.safetensors", - "model.layers.13.residual_mlp.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.13.residual_mlp.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.13.residual_mlp.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.13.self_attn.k_proj.weight": "model-00073-of-00195.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00073-of-00195.safetensors", - "model.layers.13.self_attn.q_proj.weight": "model-00073-of-00195.safetensors", - "model.layers.13.self_attn.v_proj.weight": "model-00073-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.10.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.10.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.10.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.100.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.100.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.100.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.101.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.101.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.101.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.102.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.102.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.102.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.103.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.103.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.103.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.104.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.104.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.104.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.105.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.105.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.105.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.106.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.106.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.106.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.107.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.107.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.107.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.108.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.108.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.108.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.109.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.109.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.109.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.11.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.11.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.11.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.110.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.110.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.110.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.111.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.111.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.111.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.112.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.112.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.112.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.113.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.113.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.113.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.114.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.114.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.114.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.115.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.115.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.115.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.116.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.116.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.116.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.117.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.117.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.117.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.118.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.118.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.118.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.119.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.119.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.119.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.12.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.12.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.12.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.120.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.120.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.120.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.121.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.121.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.121.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.122.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.122.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.122.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.123.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.123.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.123.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.124.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.124.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.124.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.125.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.125.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.125.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.126.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.126.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.126.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.127.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.127.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.127.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.13.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.13.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.13.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.14.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.14.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.14.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.15.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.15.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.15.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.16.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.16.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.16.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.17.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.17.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.17.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.18.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.18.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.18.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.19.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.19.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.19.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00078-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00078-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00078-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.20.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.20.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.20.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.21.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.21.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.21.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.22.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.22.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.22.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.23.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.23.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.23.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.24.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.24.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.24.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.25.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.25.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.25.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.26.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.26.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.26.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.27.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.27.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.27.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.28.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.28.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.28.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.29.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.29.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.29.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.30.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.30.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.30.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.31.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.31.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.31.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.32.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.32.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.32.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.33.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.33.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.33.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.34.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.34.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.34.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.35.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.35.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.35.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.36.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.36.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.36.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.37.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.37.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.37.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.38.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.38.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.38.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.39.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.39.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.39.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.40.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.40.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.40.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.41.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.41.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.41.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.42.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.42.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.42.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.43.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.43.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.43.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.44.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.44.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.44.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.45.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.45.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.45.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.46.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.46.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.46.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.47.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.47.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.47.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.48.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.48.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.48.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.49.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.49.w2.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.49.w3.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.50.w1.weight": "model-00080-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.50.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.50.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.51.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.51.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.51.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.52.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.52.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.52.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.53.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.53.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.53.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.54.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.54.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.54.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.55.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.55.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.55.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.56.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.56.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.56.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.57.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.57.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.57.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.58.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.58.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.58.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.59.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.59.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.59.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.60.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.60.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.60.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.61.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.61.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.61.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.62.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.62.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.62.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.63.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.63.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.63.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.64.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.64.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.64.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.65.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.65.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.65.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.66.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.66.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.66.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.67.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.67.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.67.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.68.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.68.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.68.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.69.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.69.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.69.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.70.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.70.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.70.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.71.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.71.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.71.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.72.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.72.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.72.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.73.w1.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.73.w2.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.73.w3.weight": "model-00081-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.74.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.74.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.74.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.75.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.75.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.75.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.76.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.76.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.76.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.77.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.77.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.77.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.78.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.78.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.78.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.79.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.79.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.79.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.8.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.8.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.8.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.80.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.80.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.80.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.81.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.81.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.81.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.82.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.82.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.82.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.83.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.83.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.83.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.84.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.84.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.84.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.85.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.85.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.85.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.86.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.86.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.86.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.87.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.87.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.87.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.88.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.88.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.88.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.89.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.89.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.89.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.9.w1.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.9.w2.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.9.w3.weight": "model-00079-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.90.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.90.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.90.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.91.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.91.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.91.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.92.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.92.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.92.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.93.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.93.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.93.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.94.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.94.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.94.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.95.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.95.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.95.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.96.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.96.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.96.w3.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.97.w1.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.97.w2.weight": "model-00082-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.97.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.98.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.98.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.98.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.99.w1.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.99.w2.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.experts.99.w3.weight": "model-00083-of-00195.safetensors", - "model.layers.14.block_sparse_moe.gate.weight": "model-00078-of-00195.safetensors", - "model.layers.14.input_layernorm.weight": "model-00084-of-00195.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00084-of-00195.safetensors", - "model.layers.14.residual_layernorm.weight": "model-00084-of-00195.safetensors", - "model.layers.14.residual_mlp.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.14.residual_mlp.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.14.residual_mlp.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.14.self_attn.k_proj.weight": "model-00078-of-00195.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00078-of-00195.safetensors", - "model.layers.14.self_attn.q_proj.weight": "model-00078-of-00195.safetensors", - "model.layers.14.self_attn.v_proj.weight": "model-00078-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.10.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.10.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.10.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.100.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.100.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.100.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.101.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.101.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.101.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.102.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.102.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.102.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.103.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.103.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.103.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.104.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.104.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.104.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.105.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.105.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.105.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.106.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.106.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.106.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.107.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.107.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.107.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.108.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.108.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.108.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.109.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.109.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.109.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.11.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.11.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.11.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.110.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.110.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.110.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.111.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.111.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.111.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.112.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.112.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.112.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.113.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.113.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.113.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.114.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.114.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.114.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.115.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.115.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.115.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.116.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.116.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.116.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.117.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.117.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.117.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.118.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.118.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.118.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.119.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.119.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.119.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.12.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.12.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.12.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.120.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.120.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.120.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.121.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.121.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.121.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.122.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.122.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.122.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.123.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.123.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.123.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.124.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.124.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.124.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.125.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.125.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.125.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.126.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.126.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.126.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.127.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.127.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.127.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.13.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.13.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.13.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.14.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.14.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.14.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.15.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.15.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.15.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.16.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.16.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.16.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.17.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.17.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.17.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.18.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.18.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.18.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.19.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.19.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.19.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.20.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.20.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.20.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.21.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.21.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.21.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.22.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.22.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.22.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.23.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.23.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.23.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.24.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.24.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.24.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.25.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.25.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.25.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.26.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.26.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.26.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.27.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.27.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.27.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.28.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.28.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.28.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.29.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.29.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.29.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.30.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.30.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.30.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.31.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.31.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.31.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.32.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.32.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.32.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.33.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.33.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.33.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.34.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.34.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.34.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.35.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.35.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.35.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.36.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.36.w2.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.36.w3.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.37.w1.weight": "model-00085-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.37.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.37.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.38.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.38.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.38.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.39.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.39.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.39.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.40.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.40.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.40.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.41.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.41.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.41.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.42.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.42.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.42.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.43.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.43.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.43.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.44.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.44.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.44.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.45.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.45.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.45.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.46.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.46.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.46.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.47.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.47.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.47.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.48.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.48.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.48.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.49.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.49.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.49.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.50.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.50.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.50.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.51.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.51.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.51.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.52.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.52.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.52.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.53.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.53.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.53.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.54.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.54.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.54.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.55.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.55.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.55.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.56.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.56.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.56.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.57.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.57.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.57.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.58.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.58.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.58.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.59.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.59.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.59.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.60.w1.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.60.w2.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.60.w3.weight": "model-00086-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.61.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.61.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.61.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.62.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.62.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.62.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.63.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.63.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.63.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.64.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.64.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.64.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.65.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.65.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.65.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.66.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.66.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.66.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.67.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.67.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.67.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.68.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.68.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.68.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.69.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.69.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.69.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.70.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.70.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.70.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.71.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.71.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.71.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.72.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.72.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.72.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.73.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.73.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.73.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.74.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.74.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.74.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.75.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.75.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.75.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.76.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.76.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.76.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.77.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.77.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.77.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.78.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.78.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.78.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.79.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.79.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.79.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.8.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.8.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.8.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.80.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.80.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.80.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.81.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.81.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.81.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.82.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.82.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.82.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.83.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.83.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.83.w3.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.84.w1.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.84.w2.weight": "model-00087-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.84.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.85.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.85.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.85.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.86.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.86.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.86.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.87.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.87.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.87.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.88.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.88.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.88.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.89.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.89.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.89.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.9.w1.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.9.w2.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.9.w3.weight": "model-00084-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.90.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.90.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.90.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.91.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.91.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.91.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.92.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.92.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.92.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.93.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.93.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.93.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.94.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.94.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.94.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.95.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.95.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.95.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.96.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.96.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.96.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.97.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.97.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.97.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.98.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.98.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.98.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.99.w1.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.99.w2.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.experts.99.w3.weight": "model-00088-of-00195.safetensors", - "model.layers.15.block_sparse_moe.gate.weight": "model-00084-of-00195.safetensors", - "model.layers.15.input_layernorm.weight": "model-00089-of-00195.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00089-of-00195.safetensors", - "model.layers.15.residual_layernorm.weight": "model-00089-of-00195.safetensors", - "model.layers.15.residual_mlp.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.15.residual_mlp.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.15.residual_mlp.w3.weight": "model-00089-of-00195.safetensors", - "model.layers.15.self_attn.k_proj.weight": "model-00084-of-00195.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00084-of-00195.safetensors", - "model.layers.15.self_attn.q_proj.weight": "model-00084-of-00195.safetensors", - "model.layers.15.self_attn.v_proj.weight": "model-00084-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00089-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00089-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.10.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.10.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.10.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.100.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.100.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.100.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.101.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.101.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.101.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.102.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.102.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.102.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.103.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.103.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.103.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.104.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.104.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.104.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.105.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.105.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.105.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.106.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.106.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.106.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.107.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.107.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.107.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.108.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.108.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.108.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.109.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.109.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.109.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.11.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.11.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.11.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.110.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.110.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.110.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.111.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.111.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.111.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.112.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.112.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.112.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.113.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.113.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.113.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.114.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.114.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.114.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.115.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.115.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.115.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.116.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.116.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.116.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.117.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.117.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.117.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.118.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.118.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.118.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.119.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.119.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.119.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.12.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.12.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.12.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.120.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.120.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.120.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.121.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.121.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.121.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.122.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.122.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.122.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.123.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.123.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.123.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.124.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.124.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.124.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.125.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.125.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.125.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.126.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.126.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.126.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.127.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.127.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.127.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.13.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.13.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.13.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.14.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.14.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.14.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.15.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.15.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.15.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.16.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.16.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.16.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.17.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.17.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.17.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.18.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.18.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.18.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.19.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.19.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.19.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.20.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.20.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.20.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.21.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.21.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.21.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.22.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.22.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.22.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.23.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.23.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.23.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.24.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.24.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.24.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.25.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.25.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.25.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.26.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.26.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.26.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.27.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.27.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.27.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.28.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.28.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.28.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.29.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.29.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.29.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.30.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.30.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.30.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.31.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.31.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.31.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.32.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.32.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.32.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.33.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.33.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.33.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.34.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.34.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.34.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.35.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.35.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.35.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.36.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.36.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.36.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.37.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.37.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.37.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.38.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.38.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.38.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.39.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.39.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.39.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.40.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.40.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.40.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.41.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.41.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.41.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.42.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.42.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.42.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.43.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.43.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.43.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.44.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.44.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.44.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.45.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.45.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.45.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.46.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.46.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.46.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.47.w1.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.47.w2.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.47.w3.weight": "model-00091-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.48.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.48.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.48.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.49.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.49.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.49.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.50.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.50.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.50.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.51.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.51.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.51.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.52.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.52.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.52.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.53.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.53.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.53.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.54.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.54.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.54.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.55.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.55.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.55.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.56.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.56.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.56.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.57.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.57.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.57.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.58.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.58.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.58.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.59.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.59.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.59.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.60.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.60.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.60.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.61.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.61.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.61.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.62.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.62.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.62.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.63.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.63.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.63.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.64.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.64.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.64.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.65.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.65.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.65.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.66.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.66.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.66.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.67.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.67.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.67.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.68.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.68.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.68.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.69.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.69.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.69.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.70.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.70.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.70.w3.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.71.w1.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.71.w2.weight": "model-00092-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.71.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.72.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.72.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.72.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.73.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.73.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.73.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.74.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.74.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.74.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.75.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.75.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.75.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.76.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.76.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.76.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.77.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.77.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.77.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.78.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.78.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.78.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.79.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.79.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.79.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.8.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.8.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.8.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.80.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.80.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.80.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.81.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.81.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.81.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.82.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.82.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.82.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.83.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.83.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.83.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.84.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.84.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.84.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.85.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.85.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.85.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.86.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.86.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.86.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.87.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.87.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.87.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.88.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.88.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.88.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.89.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.89.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.89.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.9.w1.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.9.w2.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.9.w3.weight": "model-00090-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.90.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.90.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.90.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.91.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.91.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.91.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.92.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.92.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.92.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.93.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.93.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.93.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.94.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.94.w2.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.94.w3.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.95.w1.weight": "model-00093-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.95.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.95.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.96.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.96.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.96.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.97.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.97.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.97.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.98.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.98.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.98.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.99.w1.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.99.w2.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.experts.99.w3.weight": "model-00094-of-00195.safetensors", - "model.layers.16.block_sparse_moe.gate.weight": "model-00089-of-00195.safetensors", - "model.layers.16.input_layernorm.weight": "model-00095-of-00195.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00095-of-00195.safetensors", - "model.layers.16.residual_layernorm.weight": "model-00095-of-00195.safetensors", - "model.layers.16.residual_mlp.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.16.residual_mlp.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.16.residual_mlp.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.16.self_attn.k_proj.weight": "model-00089-of-00195.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00089-of-00195.safetensors", - "model.layers.16.self_attn.q_proj.weight": "model-00089-of-00195.safetensors", - "model.layers.16.self_attn.v_proj.weight": "model-00089-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.10.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.10.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.10.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.100.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.100.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.100.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.101.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.101.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.101.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.102.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.102.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.102.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.103.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.103.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.103.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.104.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.104.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.104.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.105.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.105.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.105.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.106.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.106.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.106.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.107.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.107.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.107.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.108.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.108.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.108.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.109.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.109.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.109.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.11.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.11.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.11.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.110.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.110.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.110.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.111.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.111.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.111.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.112.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.112.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.112.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.113.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.113.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.113.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.114.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.114.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.114.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.115.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.115.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.115.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.116.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.116.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.116.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.117.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.117.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.117.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.118.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.118.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.118.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.119.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.119.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.119.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.12.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.12.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.12.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.120.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.120.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.120.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.121.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.121.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.121.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.122.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.122.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.122.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.123.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.123.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.123.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.124.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.124.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.124.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.125.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.125.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.125.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.126.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.126.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.126.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.127.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.127.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.127.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.13.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.13.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.13.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.14.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.14.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.14.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.15.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.15.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.15.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.16.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.16.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.16.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.17.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.17.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.17.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.18.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.18.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.18.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.19.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.19.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.19.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.20.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.20.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.20.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.21.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.21.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.21.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.22.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.22.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.22.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.23.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.23.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.23.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.24.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.24.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.24.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.25.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.25.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.25.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.26.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.26.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.26.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.27.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.27.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.27.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.28.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.28.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.28.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.29.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.29.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.29.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.30.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.30.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.30.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.31.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.31.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.31.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.32.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.32.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.32.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.33.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.33.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.33.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.34.w1.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.34.w2.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.34.w3.weight": "model-00096-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.35.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.35.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.35.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.36.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.36.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.36.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.37.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.37.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.37.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.38.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.38.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.38.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.39.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.39.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.39.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.40.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.40.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.40.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.41.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.41.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.41.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.42.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.42.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.42.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.43.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.43.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.43.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.44.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.44.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.44.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.45.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.45.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.45.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.46.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.46.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.46.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.47.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.47.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.47.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.48.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.48.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.48.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.49.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.49.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.49.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.50.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.50.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.50.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.51.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.51.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.51.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.52.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.52.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.52.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.53.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.53.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.53.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.54.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.54.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.54.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.55.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.55.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.55.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.56.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.56.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.56.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.57.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.57.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.57.w3.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.58.w1.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.58.w2.weight": "model-00097-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.58.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.59.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.59.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.59.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.60.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.60.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.60.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.61.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.61.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.61.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.62.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.62.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.62.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.63.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.63.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.63.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.64.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.64.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.64.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.65.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.65.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.65.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.66.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.66.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.66.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.67.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.67.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.67.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.68.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.68.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.68.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.69.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.69.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.69.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.70.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.70.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.70.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.71.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.71.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.71.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.72.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.72.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.72.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.73.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.73.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.73.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.74.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.74.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.74.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.75.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.75.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.75.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.76.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.76.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.76.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.77.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.77.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.77.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.78.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.78.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.78.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.79.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.79.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.79.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.8.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.8.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.8.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.80.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.80.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.80.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.81.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.81.w2.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.81.w3.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.82.w1.weight": "model-00098-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.82.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.82.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.83.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.83.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.83.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.84.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.84.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.84.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.85.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.85.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.85.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.86.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.86.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.86.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.87.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.87.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.87.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.88.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.88.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.88.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.89.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.89.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.89.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.9.w1.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.9.w2.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.9.w3.weight": "model-00095-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.90.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.90.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.90.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.91.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.91.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.91.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.92.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.92.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.92.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.93.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.93.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.93.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.94.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.94.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.94.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.95.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.95.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.95.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.96.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.96.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.96.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.97.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.97.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.97.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.98.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.98.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.98.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.99.w1.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.99.w2.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.experts.99.w3.weight": "model-00099-of-00195.safetensors", - "model.layers.17.block_sparse_moe.gate.weight": "model-00095-of-00195.safetensors", - "model.layers.17.input_layernorm.weight": "model-00100-of-00195.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00100-of-00195.safetensors", - "model.layers.17.residual_layernorm.weight": "model-00100-of-00195.safetensors", - "model.layers.17.residual_mlp.w1.weight": "model-00100-of-00195.safetensors", - "model.layers.17.residual_mlp.w2.weight": "model-00100-of-00195.safetensors", - "model.layers.17.residual_mlp.w3.weight": "model-00100-of-00195.safetensors", - "model.layers.17.self_attn.k_proj.weight": "model-00095-of-00195.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00095-of-00195.safetensors", - "model.layers.17.self_attn.q_proj.weight": "model-00095-of-00195.safetensors", - "model.layers.17.self_attn.v_proj.weight": "model-00095-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.10.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.10.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.10.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.100.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.100.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.100.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.101.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.101.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.101.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.102.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.102.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.102.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.103.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.103.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.103.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.104.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.104.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.104.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.105.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.105.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.105.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.106.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.106.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.106.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.107.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.107.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.107.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.108.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.108.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.108.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.109.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.109.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.109.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.11.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.11.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.11.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.110.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.110.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.110.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.111.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.111.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.111.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.112.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.112.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.112.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.113.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.113.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.113.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.114.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.114.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.114.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.115.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.115.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.115.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.116.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.116.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.116.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.117.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.117.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.117.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.118.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.118.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.118.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.119.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.119.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.119.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.12.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.12.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.12.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.120.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.120.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.120.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.121.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.121.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.121.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.122.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.122.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.122.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.123.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.123.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.123.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.124.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.124.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.124.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.125.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.125.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.125.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.126.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.126.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.126.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.127.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.127.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.127.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.13.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.13.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.13.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.14.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.14.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.14.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.15.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.15.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.15.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.16.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.16.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.16.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.17.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.17.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.17.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.18.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.18.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.18.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.19.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.19.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.19.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.20.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.20.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.20.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.21.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.21.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.21.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.22.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.22.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.22.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.23.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.23.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.23.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.24.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.24.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.24.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.25.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.25.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.25.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.26.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.26.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.26.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.27.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.27.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.27.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.28.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.28.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.28.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.29.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.29.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.29.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.30.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.30.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.30.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.31.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.31.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.31.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.32.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.32.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.32.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.33.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.33.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.33.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.34.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.34.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.34.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.35.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.35.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.35.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.36.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.36.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.36.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.37.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.37.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.37.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.38.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.38.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.38.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.39.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.39.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.39.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.40.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.40.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.40.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.41.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.41.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.41.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.42.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.42.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.42.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.43.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.43.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.43.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.44.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.44.w2.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.44.w3.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.45.w1.weight": "model-00102-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.45.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.45.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.46.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.46.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.46.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.47.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.47.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.47.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.48.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.48.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.48.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.49.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.49.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.49.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.50.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.50.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.50.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.51.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.51.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.51.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.52.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.52.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.52.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.53.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.53.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.53.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.54.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.54.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.54.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.55.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.55.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.55.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.56.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.56.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.56.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.57.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.57.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.57.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.58.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.58.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.58.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.59.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.59.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.59.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.60.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.60.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.60.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.61.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.61.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.61.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.62.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.62.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.62.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.63.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.63.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.63.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.64.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.64.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.64.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.65.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.65.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.65.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.66.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.66.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.66.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.67.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.67.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.67.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.68.w1.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.68.w2.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.68.w3.weight": "model-00103-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.69.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.69.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.69.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.70.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.70.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.70.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.71.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.71.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.71.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.72.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.72.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.72.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.73.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.73.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.73.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.74.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.74.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.74.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.75.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.75.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.75.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.76.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.76.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.76.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.77.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.77.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.77.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.78.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.78.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.78.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.79.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.79.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.79.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.8.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.8.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.8.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.80.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.80.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.80.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.81.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.81.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.81.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.82.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.82.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.82.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.83.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.83.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.83.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.84.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.84.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.84.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.85.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.85.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.85.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.86.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.86.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.86.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.87.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.87.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.87.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.88.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.88.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.88.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.89.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.89.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.89.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.9.w1.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.9.w2.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.9.w3.weight": "model-00101-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.90.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.90.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.90.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.91.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.91.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.91.w3.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.92.w1.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.92.w2.weight": "model-00104-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.92.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.93.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.93.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.93.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.94.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.94.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.94.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.95.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.95.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.95.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.96.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.96.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.96.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.97.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.97.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.97.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.98.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.98.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.98.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.99.w1.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.99.w2.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.experts.99.w3.weight": "model-00105-of-00195.safetensors", - "model.layers.18.block_sparse_moe.gate.weight": "model-00101-of-00195.safetensors", - "model.layers.18.input_layernorm.weight": "model-00106-of-00195.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00106-of-00195.safetensors", - "model.layers.18.residual_layernorm.weight": "model-00106-of-00195.safetensors", - "model.layers.18.residual_mlp.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.18.residual_mlp.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.18.residual_mlp.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.18.self_attn.k_proj.weight": "model-00101-of-00195.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00101-of-00195.safetensors", - "model.layers.18.self_attn.q_proj.weight": "model-00101-of-00195.safetensors", - "model.layers.18.self_attn.v_proj.weight": "model-00101-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.10.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.10.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.10.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.100.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.100.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.100.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.101.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.101.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.101.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.102.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.102.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.102.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.103.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.103.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.103.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.104.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.104.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.104.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.105.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.105.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.105.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.106.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.106.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.106.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.107.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.107.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.107.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.108.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.108.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.108.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.109.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.109.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.109.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.11.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.11.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.11.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.110.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.110.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.110.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.111.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.111.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.111.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.112.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.112.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.112.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.113.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.113.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.113.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.114.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.114.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.114.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.115.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.115.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.115.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.116.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.116.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.116.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.117.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.117.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.117.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.118.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.118.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.118.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.119.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.119.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.119.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.12.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.12.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.12.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.120.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.120.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.120.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.121.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.121.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.121.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.122.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.122.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.122.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.123.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.123.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.123.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.124.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.124.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.124.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.125.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.125.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.125.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.126.w1.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.126.w2.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.126.w3.weight": "model-00111-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.127.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.127.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.127.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.13.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.13.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.13.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.14.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.14.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.14.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.15.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.15.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.15.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.16.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.16.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.16.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.17.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.17.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.17.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.18.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.18.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.18.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.19.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.19.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.19.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.20.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.20.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.20.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.21.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.21.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.21.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.22.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.22.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.22.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.23.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.23.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.23.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.24.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.24.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.24.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.25.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.25.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.25.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.26.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.26.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.26.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.27.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.27.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.27.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.28.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.28.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.28.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.29.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.29.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.29.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.30.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.30.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.30.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.31.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.31.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.31.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.32.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.32.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.32.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.33.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.33.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.33.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.34.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.34.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.34.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.35.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.35.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.35.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.36.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.36.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.36.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.37.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.37.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.37.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.38.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.38.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.38.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.39.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.39.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.39.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.40.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.40.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.40.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.41.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.41.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.41.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.42.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.42.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.42.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.43.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.43.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.43.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.44.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.44.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.44.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.45.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.45.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.45.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.46.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.46.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.46.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.47.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.47.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.47.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.48.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.48.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.48.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.49.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.49.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.49.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.50.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.50.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.50.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.51.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.51.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.51.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.52.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.52.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.52.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.53.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.53.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.53.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.54.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.54.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.54.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.55.w1.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.55.w2.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.55.w3.weight": "model-00108-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.56.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.56.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.56.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.57.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.57.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.57.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.58.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.58.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.58.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.59.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.59.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.59.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.60.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.60.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.60.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.61.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.61.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.61.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.62.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.62.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.62.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.63.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.63.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.63.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.64.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.64.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.64.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.65.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.65.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.65.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.66.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.66.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.66.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.67.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.67.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.67.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.68.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.68.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.68.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.69.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.69.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.69.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.70.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.70.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.70.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.71.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.71.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.71.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.72.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.72.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.72.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.73.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.73.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.73.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.74.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.74.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.74.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.75.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.75.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.75.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.76.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.76.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.76.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.77.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.77.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.77.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.78.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.78.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.78.w3.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.79.w1.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.79.w2.weight": "model-00109-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.79.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.8.w1.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.8.w2.weight": "model-00106-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.8.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.80.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.80.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.80.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.81.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.81.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.81.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.82.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.82.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.82.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.83.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.83.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.83.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.84.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.84.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.84.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.85.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.85.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.85.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.86.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.86.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.86.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.87.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.87.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.87.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.88.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.88.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.88.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.89.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.89.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.89.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.9.w1.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.9.w2.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.9.w3.weight": "model-00107-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.90.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.90.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.90.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.91.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.91.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.91.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.92.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.92.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.92.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.93.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.93.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.93.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.94.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.94.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.94.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.95.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.95.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.95.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.96.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.96.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.96.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.97.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.97.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.97.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.98.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.98.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.98.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.99.w1.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.99.w2.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.experts.99.w3.weight": "model-00110-of-00195.safetensors", - "model.layers.19.block_sparse_moe.gate.weight": "model-00106-of-00195.safetensors", - "model.layers.19.input_layernorm.weight": "model-00112-of-00195.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00112-of-00195.safetensors", - "model.layers.19.residual_layernorm.weight": "model-00112-of-00195.safetensors", - "model.layers.19.residual_mlp.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.19.residual_mlp.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.19.residual_mlp.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.19.self_attn.k_proj.weight": "model-00106-of-00195.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00106-of-00195.safetensors", - "model.layers.19.self_attn.q_proj.weight": "model-00106-of-00195.safetensors", - "model.layers.19.self_attn.v_proj.weight": "model-00106-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.10.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.10.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.10.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.100.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.100.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.100.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.101.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.101.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.101.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.102.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.102.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.102.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.103.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.103.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.103.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.104.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.104.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.104.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.105.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.105.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.105.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.106.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.106.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.106.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.107.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.107.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.107.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.108.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.108.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.108.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.109.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.109.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.109.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.11.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.11.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.11.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.110.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.110.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.110.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.111.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.111.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.111.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.112.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.112.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.112.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.113.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.113.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.113.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.114.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.114.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.114.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.115.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.115.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.115.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.116.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.116.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.116.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.117.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.117.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.117.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.118.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.118.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.118.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.119.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.119.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.119.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.12.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.12.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.12.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.120.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.120.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.120.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.121.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.121.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.121.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.122.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.122.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.122.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.123.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.123.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.123.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.124.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.124.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.124.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.125.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.125.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.125.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.126.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.126.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.126.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.127.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.127.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.127.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.13.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.13.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.13.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.14.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.14.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.14.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.15.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.15.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.15.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.16.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.16.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.16.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.17.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.17.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.17.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.18.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.18.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.18.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.19.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.19.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.19.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.20.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.20.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.20.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.21.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.21.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.21.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.22.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.22.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.22.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.23.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.23.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.23.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.24.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.24.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.24.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.25.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.25.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.25.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.26.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.26.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.26.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.27.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.27.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.27.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.28.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.28.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.28.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.29.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.29.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.29.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.30.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.30.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.30.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.31.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.31.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.31.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.32.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.32.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.32.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.33.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.33.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.33.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.34.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.34.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.34.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.35.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.35.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.35.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.36.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.36.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.36.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.37.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.37.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.37.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.38.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.38.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.38.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.39.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.39.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.39.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.40.w1.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.40.w2.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.40.w3.weight": "model-00013-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.41.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.41.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.41.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.42.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.42.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.42.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.43.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.43.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.43.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.44.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.44.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.44.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.45.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.45.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.45.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.46.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.46.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.46.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.47.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.47.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.47.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.48.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.48.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.48.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.49.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.49.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.49.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.50.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.50.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.50.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.51.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.51.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.51.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.52.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.52.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.52.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.53.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.53.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.53.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.54.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.54.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.54.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.55.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.55.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.55.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.56.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.56.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.56.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.57.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.57.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.57.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.58.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.58.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.58.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.59.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.59.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.59.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.60.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.60.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.60.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.61.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.61.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.61.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.62.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.62.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.62.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.63.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.63.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.63.w3.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.64.w1.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.64.w2.weight": "model-00014-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.64.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.65.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.65.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.65.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.66.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.66.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.66.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.67.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.67.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.67.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.68.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.68.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.68.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.69.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.69.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.69.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.70.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.70.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.70.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.71.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.71.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.71.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.72.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.72.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.72.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.73.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.73.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.73.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.74.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.74.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.74.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.75.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.75.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.75.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.76.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.76.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.76.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.77.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.77.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.77.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.78.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.78.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.78.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.79.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.79.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.79.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.8.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.8.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.8.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.80.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.80.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.80.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.81.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.81.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.81.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.82.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.82.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.82.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.83.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.83.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.83.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.84.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.84.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.84.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.85.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.85.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.85.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.86.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.86.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.86.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.87.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.87.w2.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.87.w3.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.88.w1.weight": "model-00015-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.88.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.88.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.89.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.89.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.89.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.9.w1.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.9.w2.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.9.w3.weight": "model-00012-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.90.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.90.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.90.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.91.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.91.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.91.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.92.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.92.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.92.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.93.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.93.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.93.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.94.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.94.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.94.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.95.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.95.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.95.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.96.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.96.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.96.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.97.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.97.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.97.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.98.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.98.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.98.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.99.w1.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.99.w2.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.experts.99.w3.weight": "model-00016-of-00195.safetensors", - "model.layers.2.block_sparse_moe.gate.weight": "model-00012-of-00195.safetensors", - "model.layers.2.input_layernorm.weight": "model-00017-of-00195.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00017-of-00195.safetensors", - "model.layers.2.residual_layernorm.weight": "model-00017-of-00195.safetensors", - "model.layers.2.residual_mlp.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.2.residual_mlp.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.2.residual_mlp.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.2.self_attn.k_proj.weight": "model-00012-of-00195.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00012-of-00195.safetensors", - "model.layers.2.self_attn.q_proj.weight": "model-00012-of-00195.safetensors", - "model.layers.2.self_attn.v_proj.weight": "model-00012-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.10.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.10.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.10.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.100.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.100.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.100.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.101.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.101.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.101.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.102.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.102.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.102.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.103.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.103.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.103.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.104.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.104.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.104.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.105.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.105.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.105.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.106.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.106.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.106.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.107.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.107.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.107.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.108.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.108.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.108.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.109.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.109.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.109.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.11.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.11.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.11.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.110.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.110.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.110.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.111.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.111.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.111.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.112.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.112.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.112.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.113.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.113.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.113.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.114.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.114.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.114.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.115.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.115.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.115.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.116.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.116.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.116.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.117.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.117.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.117.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.118.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.118.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.118.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.119.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.119.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.119.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.12.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.12.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.12.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.120.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.120.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.120.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.121.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.121.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.121.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.122.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.122.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.122.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.123.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.123.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.123.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.124.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.124.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.124.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.125.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.125.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.125.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.126.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.126.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.126.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.127.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.127.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.127.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.13.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.13.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.13.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.14.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.14.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.14.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.15.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.15.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.15.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.16.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.16.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.16.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.17.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.17.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.17.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.18.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.18.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.18.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.19.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.19.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.19.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.20.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.20.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.20.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.21.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.21.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.21.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.22.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.22.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.22.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.23.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.23.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.23.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.24.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.24.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.24.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.25.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.25.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.25.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.26.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.26.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.26.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.27.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.27.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.27.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.28.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.28.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.28.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.29.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.29.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.29.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.30.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.30.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.30.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.31.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.31.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.31.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.32.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.32.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.32.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.33.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.33.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.33.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.34.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.34.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.34.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.35.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.35.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.35.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.36.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.36.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.36.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.37.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.37.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.37.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.38.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.38.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.38.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.39.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.39.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.39.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.40.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.40.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.40.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.41.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.41.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.41.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.42.w1.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.42.w2.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.42.w3.weight": "model-00113-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.43.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.43.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.43.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.44.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.44.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.44.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.45.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.45.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.45.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.46.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.46.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.46.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.47.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.47.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.47.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.48.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.48.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.48.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.49.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.49.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.49.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.50.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.50.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.50.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.51.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.51.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.51.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.52.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.52.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.52.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.53.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.53.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.53.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.54.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.54.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.54.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.55.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.55.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.55.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.56.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.56.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.56.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.57.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.57.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.57.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.58.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.58.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.58.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.59.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.59.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.59.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.60.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.60.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.60.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.61.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.61.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.61.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.62.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.62.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.62.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.63.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.63.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.63.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.64.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.64.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.64.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.65.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.65.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.65.w3.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.66.w1.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.66.w2.weight": "model-00114-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.66.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.67.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.67.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.67.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.68.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.68.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.68.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.69.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.69.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.69.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.70.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.70.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.70.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.71.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.71.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.71.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.72.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.72.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.72.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.73.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.73.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.73.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.74.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.74.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.74.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.75.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.75.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.75.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.76.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.76.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.76.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.77.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.77.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.77.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.78.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.78.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.78.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.79.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.79.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.79.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.8.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.8.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.8.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.80.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.80.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.80.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.81.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.81.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.81.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.82.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.82.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.82.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.83.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.83.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.83.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.84.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.84.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.84.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.85.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.85.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.85.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.86.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.86.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.86.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.87.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.87.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.87.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.88.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.88.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.88.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.89.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.89.w2.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.89.w3.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.9.w1.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.9.w2.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.9.w3.weight": "model-00112-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.90.w1.weight": "model-00115-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.90.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.90.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.91.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.91.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.91.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.92.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.92.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.92.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.93.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.93.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.93.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.94.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.94.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.94.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.95.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.95.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.95.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.96.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.96.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.96.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.97.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.97.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.97.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.98.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.98.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.98.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.99.w1.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.99.w2.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.experts.99.w3.weight": "model-00116-of-00195.safetensors", - "model.layers.20.block_sparse_moe.gate.weight": "model-00112-of-00195.safetensors", - "model.layers.20.input_layernorm.weight": "model-00117-of-00195.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00117-of-00195.safetensors", - "model.layers.20.residual_layernorm.weight": "model-00117-of-00195.safetensors", - "model.layers.20.residual_mlp.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.20.residual_mlp.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.20.residual_mlp.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.20.self_attn.k_proj.weight": "model-00112-of-00195.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00112-of-00195.safetensors", - "model.layers.20.self_attn.q_proj.weight": "model-00112-of-00195.safetensors", - "model.layers.20.self_attn.v_proj.weight": "model-00112-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.10.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.10.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.10.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.100.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.100.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.100.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.101.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.101.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.101.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.102.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.102.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.102.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.103.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.103.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.103.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.104.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.104.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.104.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.105.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.105.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.105.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.106.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.106.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.106.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.107.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.107.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.107.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.108.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.108.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.108.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.109.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.109.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.109.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.11.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.11.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.11.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.110.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.110.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.110.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.111.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.111.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.111.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.112.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.112.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.112.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.113.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.113.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.113.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.114.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.114.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.114.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.115.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.115.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.115.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.116.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.116.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.116.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.117.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.117.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.117.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.118.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.118.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.118.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.119.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.119.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.119.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.12.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.12.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.12.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.120.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.120.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.120.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.121.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.121.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.121.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.122.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.122.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.122.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.123.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.123.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.123.w3.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.124.w1.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.124.w2.weight": "model-00122-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.124.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.125.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.125.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.125.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.126.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.126.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.126.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.127.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.127.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.127.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.13.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.13.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.13.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.14.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.14.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.14.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.15.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.15.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.15.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.16.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.16.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.16.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.17.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.17.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.17.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.18.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.18.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.18.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.19.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.19.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.19.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.20.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.20.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.20.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.21.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.21.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.21.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.22.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.22.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.22.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.23.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.23.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.23.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.24.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.24.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.24.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.25.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.25.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.25.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.26.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.26.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.26.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.27.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.27.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.27.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.28.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.28.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.28.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.29.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.29.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.29.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.30.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.30.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.30.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.31.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.31.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.31.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.32.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.32.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.32.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.33.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.33.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.33.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.34.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.34.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.34.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.35.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.35.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.35.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.36.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.36.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.36.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.37.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.37.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.37.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.38.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.38.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.38.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.39.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.39.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.39.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.40.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.40.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.40.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.41.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.41.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.41.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.42.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.42.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.42.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.43.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.43.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.43.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.44.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.44.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.44.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.45.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.45.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.45.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.46.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.46.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.46.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.47.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.47.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.47.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.48.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.48.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.48.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.49.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.49.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.49.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00117-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00117-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.50.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.50.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.50.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.51.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.51.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.51.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.52.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.52.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.52.w3.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.53.w1.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.53.w2.weight": "model-00119-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.53.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.54.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.54.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.54.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.55.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.55.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.55.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.56.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.56.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.56.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.57.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.57.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.57.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.58.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.58.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.58.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.59.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.59.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.59.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00117-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.60.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.60.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.60.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.61.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.61.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.61.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.62.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.62.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.62.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.63.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.63.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.63.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.64.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.64.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.64.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.65.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.65.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.65.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.66.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.66.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.66.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.67.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.67.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.67.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.68.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.68.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.68.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.69.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.69.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.69.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.70.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.70.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.70.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.71.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.71.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.71.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.72.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.72.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.72.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.73.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.73.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.73.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.74.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.74.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.74.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.75.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.75.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.75.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.76.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.76.w2.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.76.w3.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.77.w1.weight": "model-00120-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.77.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.77.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.78.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.78.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.78.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.79.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.79.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.79.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.8.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.8.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.8.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.80.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.80.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.80.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.81.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.81.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.81.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.82.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.82.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.82.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.83.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.83.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.83.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.84.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.84.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.84.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.85.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.85.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.85.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.86.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.86.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.86.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.87.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.87.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.87.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.88.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.88.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.88.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.89.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.89.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.89.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.9.w1.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.9.w2.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.9.w3.weight": "model-00118-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.90.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.90.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.90.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.91.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.91.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.91.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.92.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.92.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.92.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.93.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.93.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.93.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.94.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.94.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.94.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.95.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.95.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.95.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.96.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.96.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.96.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.97.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.97.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.97.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.98.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.98.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.98.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.99.w1.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.99.w2.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.experts.99.w3.weight": "model-00121-of-00195.safetensors", - "model.layers.21.block_sparse_moe.gate.weight": "model-00117-of-00195.safetensors", - "model.layers.21.input_layernorm.weight": "model-00123-of-00195.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00123-of-00195.safetensors", - "model.layers.21.residual_layernorm.weight": "model-00123-of-00195.safetensors", - "model.layers.21.residual_mlp.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.21.residual_mlp.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.21.residual_mlp.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.21.self_attn.k_proj.weight": "model-00117-of-00195.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00117-of-00195.safetensors", - "model.layers.21.self_attn.q_proj.weight": "model-00117-of-00195.safetensors", - "model.layers.21.self_attn.v_proj.weight": "model-00117-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.10.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.10.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.10.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.100.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.100.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.100.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.101.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.101.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.101.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.102.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.102.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.102.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.103.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.103.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.103.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.104.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.104.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.104.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.105.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.105.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.105.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.106.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.106.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.106.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.107.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.107.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.107.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.108.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.108.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.108.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.109.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.109.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.109.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.11.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.11.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.11.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.110.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.110.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.110.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.111.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.111.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.111.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.112.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.112.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.112.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.113.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.113.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.113.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.114.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.114.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.114.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.115.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.115.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.115.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.116.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.116.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.116.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.117.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.117.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.117.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.118.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.118.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.118.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.119.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.119.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.119.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.12.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.12.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.12.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.120.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.120.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.120.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.121.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.121.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.121.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.122.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.122.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.122.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.123.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.123.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.123.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.124.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.124.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.124.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.125.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.125.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.125.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.126.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.126.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.126.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.127.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.127.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.127.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.13.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.13.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.13.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.14.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.14.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.14.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.15.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.15.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.15.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.16.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.16.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.16.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.17.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.17.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.17.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.18.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.18.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.18.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.19.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.19.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.19.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.20.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.20.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.20.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.21.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.21.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.21.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.22.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.22.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.22.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.23.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.23.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.23.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.24.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.24.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.24.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.25.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.25.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.25.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.26.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.26.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.26.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.27.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.27.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.27.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.28.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.28.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.28.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.29.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.29.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.29.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.30.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.30.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.30.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.31.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.31.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.31.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.32.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.32.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.32.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.33.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.33.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.33.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.34.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.34.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.34.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.35.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.35.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.35.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.36.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.36.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.36.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.37.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.37.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.37.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.38.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.38.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.38.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.39.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.39.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.39.w3.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.40.w1.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.40.w2.weight": "model-00124-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.40.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.41.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.41.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.41.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.42.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.42.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.42.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.43.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.43.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.43.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.44.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.44.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.44.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.45.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.45.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.45.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.46.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.46.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.46.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.47.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.47.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.47.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.48.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.48.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.48.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.49.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.49.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.49.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.50.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.50.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.50.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.51.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.51.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.51.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.52.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.52.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.52.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.53.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.53.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.53.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.54.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.54.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.54.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.55.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.55.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.55.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.56.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.56.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.56.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.57.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.57.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.57.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.58.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.58.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.58.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.59.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.59.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.59.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.60.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.60.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.60.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.61.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.61.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.61.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.62.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.62.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.62.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.63.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.63.w2.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.63.w3.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.64.w1.weight": "model-00125-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.64.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.64.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.65.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.65.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.65.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.66.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.66.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.66.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.67.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.67.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.67.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.68.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.68.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.68.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.69.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.69.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.69.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.70.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.70.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.70.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.71.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.71.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.71.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.72.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.72.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.72.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.73.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.73.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.73.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.74.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.74.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.74.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.75.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.75.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.75.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.76.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.76.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.76.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.77.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.77.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.77.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.78.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.78.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.78.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.79.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.79.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.79.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.8.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.8.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.8.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.80.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.80.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.80.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.81.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.81.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.81.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.82.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.82.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.82.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.83.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.83.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.83.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.84.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.84.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.84.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.85.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.85.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.85.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.86.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.86.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.86.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.87.w1.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.87.w2.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.87.w3.weight": "model-00126-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.88.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.88.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.88.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.89.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.89.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.89.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.9.w1.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.9.w2.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.9.w3.weight": "model-00123-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.90.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.90.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.90.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.91.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.91.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.91.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.92.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.92.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.92.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.93.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.93.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.93.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.94.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.94.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.94.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.95.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.95.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.95.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.96.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.96.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.96.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.97.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.97.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.97.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.98.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.98.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.98.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.99.w1.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.99.w2.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.experts.99.w3.weight": "model-00127-of-00195.safetensors", - "model.layers.22.block_sparse_moe.gate.weight": "model-00123-of-00195.safetensors", - "model.layers.22.input_layernorm.weight": "model-00128-of-00195.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00128-of-00195.safetensors", - "model.layers.22.residual_layernorm.weight": "model-00128-of-00195.safetensors", - "model.layers.22.residual_mlp.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.22.residual_mlp.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.22.residual_mlp.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.22.self_attn.k_proj.weight": "model-00123-of-00195.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00123-of-00195.safetensors", - "model.layers.22.self_attn.q_proj.weight": "model-00123-of-00195.safetensors", - "model.layers.22.self_attn.v_proj.weight": "model-00123-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.10.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.10.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.10.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.100.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.100.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.100.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.101.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.101.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.101.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.102.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.102.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.102.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.103.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.103.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.103.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.104.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.104.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.104.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.105.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.105.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.105.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.106.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.106.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.106.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.107.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.107.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.107.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.108.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.108.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.108.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.109.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.109.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.109.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.11.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.11.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.11.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.110.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.110.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.110.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.111.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.111.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.111.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.112.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.112.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.112.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.113.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.113.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.113.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.114.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.114.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.114.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.115.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.115.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.115.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.116.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.116.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.116.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.117.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.117.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.117.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.118.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.118.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.118.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.119.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.119.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.119.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.12.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.12.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.12.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.120.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.120.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.120.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.121.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.121.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.121.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.122.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.122.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.122.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.123.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.123.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.123.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.124.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.124.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.124.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.125.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.125.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.125.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.126.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.126.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.126.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.127.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.127.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.127.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.13.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.13.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.13.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.14.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.14.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.14.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.15.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.15.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.15.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.16.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.16.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.16.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.17.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.17.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.17.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.18.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.18.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.18.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.19.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.19.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.19.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.20.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.20.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.20.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.21.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.21.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.21.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.22.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.22.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.22.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.23.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.23.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.23.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.24.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.24.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.24.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.25.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.25.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.25.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.26.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.26.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.26.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.27.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.27.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.27.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.28.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.28.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.28.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.29.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.29.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.29.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00128-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00128-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00128-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.30.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.30.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.30.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.31.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.31.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.31.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.32.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.32.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.32.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.33.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.33.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.33.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.34.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.34.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.34.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.35.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.35.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.35.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.36.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.36.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.36.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.37.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.37.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.37.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.38.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.38.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.38.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.39.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.39.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.39.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.40.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.40.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.40.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.41.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.41.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.41.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.42.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.42.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.42.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.43.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.43.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.43.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.44.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.44.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.44.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.45.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.45.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.45.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.46.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.46.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.46.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.47.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.47.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.47.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.48.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.48.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.48.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.49.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.49.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.49.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.50.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.50.w2.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.50.w3.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.51.w1.weight": "model-00130-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.51.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.51.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.52.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.52.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.52.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.53.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.53.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.53.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.54.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.54.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.54.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.55.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.55.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.55.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.56.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.56.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.56.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.57.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.57.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.57.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.58.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.58.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.58.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.59.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.59.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.59.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.60.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.60.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.60.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.61.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.61.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.61.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.62.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.62.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.62.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.63.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.63.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.63.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.64.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.64.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.64.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.65.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.65.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.65.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.66.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.66.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.66.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.67.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.67.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.67.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.68.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.68.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.68.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.69.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.69.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.69.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.70.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.70.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.70.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.71.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.71.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.71.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.72.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.72.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.72.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.73.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.73.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.73.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.74.w1.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.74.w2.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.74.w3.weight": "model-00131-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.75.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.75.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.75.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.76.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.76.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.76.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.77.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.77.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.77.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.78.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.78.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.78.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.79.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.79.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.79.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.8.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.8.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.8.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.80.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.80.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.80.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.81.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.81.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.81.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.82.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.82.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.82.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.83.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.83.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.83.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.84.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.84.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.84.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.85.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.85.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.85.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.86.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.86.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.86.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.87.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.87.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.87.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.88.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.88.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.88.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.89.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.89.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.89.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.9.w1.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.9.w2.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.9.w3.weight": "model-00129-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.90.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.90.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.90.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.91.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.91.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.91.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.92.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.92.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.92.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.93.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.93.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.93.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.94.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.94.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.94.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.95.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.95.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.95.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.96.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.96.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.96.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.97.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.97.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.97.w3.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.98.w1.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.98.w2.weight": "model-00132-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.98.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.99.w1.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.99.w2.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.experts.99.w3.weight": "model-00133-of-00195.safetensors", - "model.layers.23.block_sparse_moe.gate.weight": "model-00128-of-00195.safetensors", - "model.layers.23.input_layernorm.weight": "model-00134-of-00195.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00134-of-00195.safetensors", - "model.layers.23.residual_layernorm.weight": "model-00134-of-00195.safetensors", - "model.layers.23.residual_mlp.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.23.residual_mlp.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.23.residual_mlp.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.23.self_attn.k_proj.weight": "model-00128-of-00195.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00128-of-00195.safetensors", - "model.layers.23.self_attn.q_proj.weight": "model-00128-of-00195.safetensors", - "model.layers.23.self_attn.v_proj.weight": "model-00128-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.10.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.10.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.10.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.100.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.100.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.100.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.101.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.101.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.101.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.102.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.102.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.102.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.103.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.103.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.103.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.104.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.104.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.104.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.105.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.105.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.105.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.106.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.106.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.106.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.107.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.107.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.107.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.108.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.108.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.108.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.109.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.109.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.109.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.11.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.11.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.11.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.110.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.110.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.110.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.111.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.111.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.111.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.112.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.112.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.112.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.113.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.113.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.113.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.114.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.114.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.114.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.115.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.115.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.115.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.116.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.116.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.116.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.117.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.117.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.117.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.118.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.118.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.118.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.119.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.119.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.119.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.12.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.12.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.12.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.120.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.120.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.120.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.121.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.121.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.121.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.122.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.122.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.122.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.123.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.123.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.123.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.124.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.124.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.124.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.125.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.125.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.125.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.126.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.126.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.126.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.127.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.127.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.127.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.13.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.13.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.13.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.14.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.14.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.14.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.15.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.15.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.15.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.16.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.16.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.16.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.17.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.17.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.17.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.18.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.18.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.18.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.19.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.19.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.19.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.20.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.20.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.20.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.21.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.21.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.21.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.22.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.22.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.22.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.23.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.23.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.23.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.24.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.24.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.24.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.25.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.25.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.25.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.26.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.26.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.26.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.27.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.27.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.27.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.28.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.28.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.28.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.29.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.29.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.29.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.30.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.30.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.30.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.31.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.31.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.31.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.32.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.32.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.32.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.33.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.33.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.33.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.34.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.34.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.34.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.35.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.35.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.35.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.36.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.36.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.36.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.37.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.37.w2.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.37.w3.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.38.w1.weight": "model-00135-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.38.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.38.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.39.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.39.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.39.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.40.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.40.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.40.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.41.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.41.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.41.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.42.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.42.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.42.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.43.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.43.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.43.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.44.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.44.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.44.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.45.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.45.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.45.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.46.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.46.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.46.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.47.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.47.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.47.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.48.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.48.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.48.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.49.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.49.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.49.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.50.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.50.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.50.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.51.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.51.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.51.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.52.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.52.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.52.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.53.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.53.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.53.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.54.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.54.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.54.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.55.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.55.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.55.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.56.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.56.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.56.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.57.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.57.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.57.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.58.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.58.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.58.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.59.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.59.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.59.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.60.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.60.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.60.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.61.w1.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.61.w2.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.61.w3.weight": "model-00136-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.62.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.62.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.62.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.63.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.63.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.63.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.64.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.64.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.64.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.65.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.65.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.65.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.66.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.66.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.66.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.67.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.67.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.67.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.68.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.68.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.68.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.69.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.69.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.69.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.70.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.70.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.70.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.71.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.71.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.71.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.72.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.72.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.72.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.73.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.73.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.73.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.74.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.74.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.74.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.75.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.75.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.75.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.76.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.76.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.76.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.77.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.77.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.77.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.78.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.78.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.78.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.79.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.79.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.79.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.8.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.8.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.8.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.80.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.80.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.80.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.81.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.81.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.81.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.82.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.82.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.82.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.83.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.83.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.83.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.84.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.84.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.84.w3.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.85.w1.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.85.w2.weight": "model-00137-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.85.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.86.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.86.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.86.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.87.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.87.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.87.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.88.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.88.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.88.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.89.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.89.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.89.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.9.w1.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.9.w2.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.9.w3.weight": "model-00134-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.90.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.90.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.90.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.91.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.91.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.91.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.92.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.92.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.92.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.93.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.93.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.93.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.94.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.94.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.94.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.95.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.95.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.95.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.96.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.96.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.96.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.97.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.97.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.97.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.98.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.98.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.98.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.99.w1.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.99.w2.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.experts.99.w3.weight": "model-00138-of-00195.safetensors", - "model.layers.24.block_sparse_moe.gate.weight": "model-00134-of-00195.safetensors", - "model.layers.24.input_layernorm.weight": "model-00139-of-00195.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00139-of-00195.safetensors", - "model.layers.24.residual_layernorm.weight": "model-00139-of-00195.safetensors", - "model.layers.24.residual_mlp.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.24.residual_mlp.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.24.residual_mlp.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.24.self_attn.k_proj.weight": "model-00134-of-00195.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00134-of-00195.safetensors", - "model.layers.24.self_attn.q_proj.weight": "model-00134-of-00195.safetensors", - "model.layers.24.self_attn.v_proj.weight": "model-00134-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00139-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00139-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00139-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.10.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.10.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.10.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.100.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.100.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.100.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.101.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.101.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.101.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.102.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.102.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.102.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.103.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.103.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.103.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.104.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.104.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.104.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.105.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.105.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.105.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.106.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.106.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.106.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.107.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.107.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.107.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.108.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.108.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.108.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.109.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.109.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.109.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.11.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.11.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.11.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.110.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.110.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.110.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.111.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.111.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.111.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.112.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.112.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.112.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.113.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.113.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.113.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.114.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.114.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.114.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.115.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.115.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.115.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.116.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.116.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.116.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.117.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.117.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.117.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.118.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.118.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.118.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.119.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.119.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.119.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.12.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.12.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.12.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.120.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.120.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.120.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.121.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.121.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.121.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.122.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.122.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.122.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.123.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.123.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.123.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.124.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.124.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.124.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.125.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.125.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.125.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.126.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.126.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.126.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.127.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.127.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.127.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.13.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.13.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.13.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.14.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.14.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.14.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.15.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.15.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.15.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.16.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.16.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.16.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.17.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.17.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.17.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.18.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.18.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.18.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.19.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.19.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.19.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.20.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.20.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.20.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.21.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.21.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.21.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.22.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.22.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.22.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.23.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.23.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.23.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.24.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.24.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.24.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.25.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.25.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.25.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.26.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.26.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.26.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.27.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.27.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.27.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.28.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.28.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.28.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.29.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.29.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.29.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.30.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.30.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.30.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.31.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.31.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.31.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.32.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.32.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.32.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.33.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.33.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.33.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.34.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.34.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.34.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.35.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.35.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.35.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.36.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.36.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.36.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.37.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.37.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.37.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.38.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.38.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.38.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.39.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.39.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.39.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.40.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.40.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.40.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.41.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.41.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.41.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.42.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.42.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.42.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.43.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.43.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.43.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.44.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.44.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.44.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.45.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.45.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.45.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.46.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.46.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.46.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.47.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.47.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.47.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.48.w1.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.48.w2.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.48.w3.weight": "model-00141-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.49.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.49.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.49.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.50.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.50.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.50.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.51.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.51.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.51.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.52.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.52.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.52.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.53.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.53.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.53.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.54.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.54.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.54.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.55.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.55.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.55.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.56.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.56.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.56.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.57.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.57.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.57.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.58.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.58.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.58.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.59.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.59.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.59.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.60.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.60.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.60.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.61.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.61.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.61.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.62.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.62.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.62.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.63.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.63.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.63.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.64.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.64.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.64.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.65.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.65.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.65.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.66.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.66.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.66.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.67.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.67.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.67.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.68.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.68.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.68.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.69.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.69.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.69.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.70.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.70.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.70.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.71.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.71.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.71.w3.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.72.w1.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.72.w2.weight": "model-00142-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.72.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.73.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.73.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.73.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.74.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.74.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.74.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.75.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.75.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.75.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.76.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.76.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.76.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.77.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.77.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.77.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.78.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.78.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.78.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.79.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.79.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.79.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.8.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.8.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.8.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.80.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.80.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.80.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.81.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.81.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.81.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.82.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.82.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.82.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.83.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.83.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.83.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.84.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.84.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.84.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.85.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.85.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.85.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.86.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.86.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.86.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.87.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.87.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.87.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.88.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.88.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.88.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.89.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.89.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.89.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.9.w1.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.9.w2.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.9.w3.weight": "model-00140-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.90.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.90.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.90.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.91.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.91.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.91.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.92.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.92.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.92.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.93.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.93.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.93.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.94.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.94.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.94.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.95.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.95.w2.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.95.w3.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.96.w1.weight": "model-00143-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.96.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.96.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.97.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.97.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.97.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.98.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.98.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.98.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.99.w1.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.99.w2.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.experts.99.w3.weight": "model-00144-of-00195.safetensors", - "model.layers.25.block_sparse_moe.gate.weight": "model-00139-of-00195.safetensors", - "model.layers.25.input_layernorm.weight": "model-00145-of-00195.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00145-of-00195.safetensors", - "model.layers.25.residual_layernorm.weight": "model-00145-of-00195.safetensors", - "model.layers.25.residual_mlp.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.25.residual_mlp.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.25.residual_mlp.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.25.self_attn.k_proj.weight": "model-00139-of-00195.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00139-of-00195.safetensors", - "model.layers.25.self_attn.q_proj.weight": "model-00139-of-00195.safetensors", - "model.layers.25.self_attn.v_proj.weight": "model-00139-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.10.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.10.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.10.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.100.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.100.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.100.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.101.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.101.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.101.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.102.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.102.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.102.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.103.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.103.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.103.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.104.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.104.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.104.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.105.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.105.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.105.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.106.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.106.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.106.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.107.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.107.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.107.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.108.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.108.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.108.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.109.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.109.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.109.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.11.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.11.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.11.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.110.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.110.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.110.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.111.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.111.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.111.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.112.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.112.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.112.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.113.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.113.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.113.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.114.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.114.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.114.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.115.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.115.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.115.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.116.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.116.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.116.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.117.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.117.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.117.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.118.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.118.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.118.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.119.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.119.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.119.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.12.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.12.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.12.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.120.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.120.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.120.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.121.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.121.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.121.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.122.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.122.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.122.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.123.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.123.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.123.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.124.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.124.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.124.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.125.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.125.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.125.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.126.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.126.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.126.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.127.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.127.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.127.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.13.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.13.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.13.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.14.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.14.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.14.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.15.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.15.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.15.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.16.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.16.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.16.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.17.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.17.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.17.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.18.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.18.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.18.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.19.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.19.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.19.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.20.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.20.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.20.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.21.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.21.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.21.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.22.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.22.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.22.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.23.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.23.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.23.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.24.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.24.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.24.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.25.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.25.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.25.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.26.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.26.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.26.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.27.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.27.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.27.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.28.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.28.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.28.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.29.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.29.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.29.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.30.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.30.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.30.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.31.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.31.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.31.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.32.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.32.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.32.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.33.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.33.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.33.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.34.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.34.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.34.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.35.w1.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.35.w2.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.35.w3.weight": "model-00146-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.36.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.36.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.36.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.37.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.37.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.37.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.38.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.38.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.38.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.39.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.39.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.39.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.40.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.40.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.40.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.41.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.41.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.41.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.42.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.42.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.42.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.43.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.43.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.43.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.44.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.44.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.44.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.45.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.45.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.45.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.46.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.46.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.46.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.47.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.47.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.47.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.48.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.48.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.48.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.49.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.49.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.49.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.50.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.50.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.50.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.51.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.51.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.51.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.52.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.52.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.52.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.53.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.53.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.53.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.54.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.54.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.54.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.55.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.55.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.55.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.56.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.56.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.56.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.57.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.57.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.57.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.58.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.58.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.58.w3.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.59.w1.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.59.w2.weight": "model-00147-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.59.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.60.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.60.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.60.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.61.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.61.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.61.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.62.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.62.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.62.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.63.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.63.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.63.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.64.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.64.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.64.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.65.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.65.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.65.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.66.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.66.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.66.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.67.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.67.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.67.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.68.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.68.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.68.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.69.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.69.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.69.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.70.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.70.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.70.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.71.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.71.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.71.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.72.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.72.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.72.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.73.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.73.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.73.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.74.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.74.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.74.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.75.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.75.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.75.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.76.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.76.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.76.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.77.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.77.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.77.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.78.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.78.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.78.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.79.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.79.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.79.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.8.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.8.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.8.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.80.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.80.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.80.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.81.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.81.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.81.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.82.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.82.w2.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.82.w3.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.83.w1.weight": "model-00148-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.83.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.83.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.84.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.84.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.84.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.85.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.85.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.85.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.86.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.86.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.86.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.87.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.87.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.87.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.88.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.88.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.88.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.89.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.89.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.89.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.9.w1.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.9.w2.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.9.w3.weight": "model-00145-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.90.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.90.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.90.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.91.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.91.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.91.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.92.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.92.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.92.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.93.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.93.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.93.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.94.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.94.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.94.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.95.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.95.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.95.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.96.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.96.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.96.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.97.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.97.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.97.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.98.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.98.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.98.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.99.w1.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.99.w2.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.experts.99.w3.weight": "model-00149-of-00195.safetensors", - "model.layers.26.block_sparse_moe.gate.weight": "model-00145-of-00195.safetensors", - "model.layers.26.input_layernorm.weight": "model-00150-of-00195.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00150-of-00195.safetensors", - "model.layers.26.residual_layernorm.weight": "model-00150-of-00195.safetensors", - "model.layers.26.residual_mlp.w1.weight": "model-00150-of-00195.safetensors", - "model.layers.26.residual_mlp.w2.weight": "model-00150-of-00195.safetensors", - "model.layers.26.residual_mlp.w3.weight": "model-00150-of-00195.safetensors", - "model.layers.26.self_attn.k_proj.weight": "model-00145-of-00195.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00145-of-00195.safetensors", - "model.layers.26.self_attn.q_proj.weight": "model-00145-of-00195.safetensors", - "model.layers.26.self_attn.v_proj.weight": "model-00145-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.10.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.10.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.10.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.100.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.100.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.100.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.101.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.101.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.101.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.102.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.102.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.102.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.103.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.103.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.103.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.104.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.104.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.104.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.105.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.105.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.105.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.106.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.106.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.106.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.107.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.107.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.107.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.108.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.108.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.108.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.109.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.109.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.109.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.11.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.11.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.11.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.110.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.110.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.110.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.111.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.111.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.111.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.112.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.112.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.112.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.113.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.113.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.113.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.114.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.114.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.114.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.115.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.115.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.115.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.116.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.116.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.116.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.117.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.117.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.117.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.118.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.118.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.118.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.119.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.119.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.119.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.12.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.12.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.12.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.120.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.120.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.120.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.121.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.121.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.121.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.122.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.122.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.122.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.123.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.123.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.123.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.124.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.124.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.124.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.125.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.125.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.125.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.126.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.126.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.126.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.127.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.127.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.127.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.13.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.13.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.13.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.14.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.14.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.14.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.15.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.15.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.15.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.16.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.16.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.16.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.17.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.17.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.17.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.18.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.18.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.18.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.19.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.19.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.19.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.20.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.20.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.20.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.21.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.21.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.21.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.22.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.22.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.22.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.23.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.23.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.23.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.24.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.24.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.24.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.25.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.25.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.25.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.26.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.26.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.26.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.27.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.27.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.27.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.28.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.28.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.28.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.29.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.29.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.29.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.30.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.30.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.30.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.31.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.31.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.31.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.32.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.32.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.32.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.33.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.33.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.33.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.34.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.34.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.34.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.35.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.35.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.35.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.36.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.36.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.36.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.37.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.37.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.37.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.38.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.38.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.38.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.39.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.39.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.39.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.40.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.40.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.40.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.41.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.41.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.41.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.42.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.42.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.42.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.43.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.43.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.43.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.44.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.44.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.44.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.45.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.45.w2.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.45.w3.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.46.w1.weight": "model-00152-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.46.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.46.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.47.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.47.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.47.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.48.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.48.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.48.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.49.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.49.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.49.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.50.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.50.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.50.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.51.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.51.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.51.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.52.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.52.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.52.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.53.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.53.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.53.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.54.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.54.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.54.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.55.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.55.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.55.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.56.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.56.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.56.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.57.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.57.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.57.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.58.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.58.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.58.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.59.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.59.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.59.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.60.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.60.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.60.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.61.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.61.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.61.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.62.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.62.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.62.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.63.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.63.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.63.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.64.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.64.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.64.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.65.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.65.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.65.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.66.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.66.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.66.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.67.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.67.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.67.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.68.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.68.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.68.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.69.w1.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.69.w2.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.69.w3.weight": "model-00153-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.70.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.70.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.70.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.71.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.71.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.71.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.72.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.72.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.72.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.73.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.73.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.73.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.74.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.74.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.74.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.75.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.75.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.75.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.76.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.76.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.76.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.77.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.77.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.77.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.78.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.78.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.78.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.79.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.79.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.79.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.8.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.8.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.8.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.80.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.80.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.80.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.81.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.81.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.81.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.82.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.82.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.82.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.83.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.83.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.83.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.84.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.84.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.84.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.85.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.85.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.85.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.86.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.86.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.86.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.87.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.87.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.87.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.88.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.88.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.88.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.89.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.89.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.89.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.9.w1.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.9.w2.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.9.w3.weight": "model-00151-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.90.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.90.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.90.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.91.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.91.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.91.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.92.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.92.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.92.w3.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.93.w1.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.93.w2.weight": "model-00154-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.93.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.94.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.94.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.94.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.95.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.95.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.95.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.96.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.96.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.96.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.97.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.97.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.97.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.98.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.98.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.98.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.99.w1.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.99.w2.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.experts.99.w3.weight": "model-00155-of-00195.safetensors", - "model.layers.27.block_sparse_moe.gate.weight": "model-00151-of-00195.safetensors", - "model.layers.27.input_layernorm.weight": "model-00156-of-00195.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00156-of-00195.safetensors", - "model.layers.27.residual_layernorm.weight": "model-00156-of-00195.safetensors", - "model.layers.27.residual_mlp.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.27.residual_mlp.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.27.residual_mlp.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.27.self_attn.k_proj.weight": "model-00150-of-00195.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00151-of-00195.safetensors", - "model.layers.27.self_attn.q_proj.weight": "model-00150-of-00195.safetensors", - "model.layers.27.self_attn.v_proj.weight": "model-00151-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.10.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.10.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.10.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.100.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.100.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.100.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.101.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.101.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.101.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.102.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.102.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.102.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.103.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.103.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.103.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.104.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.104.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.104.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.105.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.105.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.105.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.106.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.106.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.106.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.107.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.107.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.107.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.108.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.108.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.108.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.109.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.109.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.109.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.11.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.11.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.11.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.110.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.110.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.110.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.111.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.111.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.111.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.112.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.112.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.112.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.113.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.113.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.113.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.114.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.114.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.114.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.115.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.115.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.115.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.116.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.116.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.116.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.117.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.117.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.117.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.118.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.118.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.118.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.119.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.119.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.119.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.12.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.12.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.12.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.120.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.120.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.120.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.121.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.121.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.121.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.122.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.122.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.122.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.123.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.123.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.123.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.124.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.124.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.124.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.125.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.125.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.125.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.126.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.126.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.126.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.127.w1.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.127.w2.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.127.w3.weight": "model-00161-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.13.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.13.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.13.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.14.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.14.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.14.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.15.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.15.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.15.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.16.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.16.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.16.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.17.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.17.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.17.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.18.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.18.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.18.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.19.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.19.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.19.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.20.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.20.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.20.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.21.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.21.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.21.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.22.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.22.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.22.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.23.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.23.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.23.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.24.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.24.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.24.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.25.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.25.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.25.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.26.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.26.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.26.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.27.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.27.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.27.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.28.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.28.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.28.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.29.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.29.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.29.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.30.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.30.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.30.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.31.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.31.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.31.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.32.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.32.w2.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.32.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.33.w1.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.33.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.33.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.34.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.34.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.34.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.35.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.35.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.35.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.36.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.36.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.36.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.37.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.37.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.37.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.38.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.38.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.38.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.39.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.39.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.39.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.40.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.40.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.40.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.41.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.41.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.41.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.42.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.42.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.42.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.43.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.43.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.43.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.44.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.44.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.44.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.45.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.45.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.45.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.46.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.46.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.46.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.47.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.47.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.47.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.48.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.48.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.48.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.49.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.49.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.49.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.50.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.50.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.50.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.51.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.51.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.51.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.52.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.52.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.52.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.53.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.53.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.53.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.54.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.54.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.54.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.55.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.55.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.55.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.56.w1.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.56.w2.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.56.w3.weight": "model-00158-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.57.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.57.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.57.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.58.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.58.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.58.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.59.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.59.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.59.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.60.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.60.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.60.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.61.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.61.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.61.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.62.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.62.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.62.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.63.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.63.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.63.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.64.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.64.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.64.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.65.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.65.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.65.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.66.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.66.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.66.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.67.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.67.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.67.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.68.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.68.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.68.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.69.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.69.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.69.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.70.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.70.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.70.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.71.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.71.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.71.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.72.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.72.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.72.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.73.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.73.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.73.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.74.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.74.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.74.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.75.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.75.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.75.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.76.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.76.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.76.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.77.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.77.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.77.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.78.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.78.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.78.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.79.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.79.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.79.w3.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.8.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.8.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.8.w3.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.80.w1.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.80.w2.weight": "model-00159-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.80.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.81.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.81.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.81.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.82.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.82.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.82.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.83.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.83.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.83.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.84.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.84.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.84.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.85.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.85.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.85.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.86.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.86.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.86.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.87.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.87.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.87.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.88.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.88.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.88.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.89.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.89.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.89.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.9.w1.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.9.w2.weight": "model-00156-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.9.w3.weight": "model-00157-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.90.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.90.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.90.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.91.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.91.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.91.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.92.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.92.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.92.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.93.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.93.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.93.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.94.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.94.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.94.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.95.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.95.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.95.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.96.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.96.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.96.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.97.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.97.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.97.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.98.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.98.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.98.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.99.w1.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.99.w2.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.experts.99.w3.weight": "model-00160-of-00195.safetensors", - "model.layers.28.block_sparse_moe.gate.weight": "model-00156-of-00195.safetensors", - "model.layers.28.input_layernorm.weight": "model-00161-of-00195.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00161-of-00195.safetensors", - "model.layers.28.residual_layernorm.weight": "model-00161-of-00195.safetensors", - "model.layers.28.residual_mlp.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.28.residual_mlp.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.28.residual_mlp.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.28.self_attn.k_proj.weight": "model-00156-of-00195.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00156-of-00195.safetensors", - "model.layers.28.self_attn.q_proj.weight": "model-00156-of-00195.safetensors", - "model.layers.28.self_attn.v_proj.weight": "model-00156-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.10.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.10.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.10.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.100.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.100.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.100.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.101.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.101.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.101.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.102.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.102.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.102.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.103.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.103.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.103.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.104.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.104.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.104.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.105.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.105.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.105.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.106.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.106.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.106.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.107.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.107.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.107.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.108.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.108.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.108.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.109.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.109.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.109.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.11.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.11.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.11.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.110.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.110.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.110.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.111.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.111.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.111.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.112.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.112.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.112.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.113.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.113.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.113.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.114.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.114.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.114.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.115.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.115.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.115.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.116.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.116.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.116.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.117.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.117.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.117.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.118.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.118.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.118.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.119.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.119.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.119.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.12.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.12.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.12.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.120.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.120.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.120.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.121.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.121.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.121.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.122.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.122.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.122.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.123.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.123.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.123.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.124.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.124.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.124.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.125.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.125.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.125.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.126.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.126.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.126.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.127.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.127.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.127.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.13.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.13.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.13.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.14.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.14.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.14.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.15.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.15.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.15.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.16.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.16.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.16.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.17.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.17.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.17.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.18.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.18.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.18.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.19.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.19.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.19.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.20.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.20.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.20.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.21.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.21.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.21.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.22.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.22.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.22.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.23.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.23.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.23.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.24.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.24.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.24.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.25.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.25.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.25.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.26.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.26.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.26.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.27.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.27.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.27.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.28.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.28.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.28.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.29.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.29.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.29.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.30.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.30.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.30.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.31.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.31.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.31.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.32.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.32.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.32.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.33.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.33.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.33.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.34.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.34.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.34.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.35.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.35.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.35.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.36.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.36.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.36.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.37.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.37.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.37.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.38.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.38.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.38.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.39.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.39.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.39.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.40.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.40.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.40.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.41.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.41.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.41.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.42.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.42.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.42.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.43.w1.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.43.w2.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.43.w3.weight": "model-00163-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.44.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.44.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.44.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.45.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.45.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.45.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.46.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.46.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.46.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.47.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.47.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.47.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.48.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.48.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.48.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.49.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.49.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.49.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.50.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.50.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.50.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.51.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.51.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.51.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.52.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.52.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.52.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.53.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.53.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.53.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.54.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.54.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.54.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.55.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.55.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.55.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.56.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.56.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.56.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.57.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.57.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.57.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.58.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.58.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.58.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.59.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.59.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.59.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.60.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.60.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.60.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.61.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.61.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.61.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.62.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.62.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.62.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.63.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.63.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.63.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.64.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.64.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.64.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.65.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.65.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.65.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.66.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.66.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.66.w3.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.67.w1.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.67.w2.weight": "model-00164-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.67.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.68.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.68.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.68.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.69.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.69.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.69.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.70.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.70.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.70.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.71.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.71.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.71.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.72.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.72.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.72.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.73.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.73.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.73.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.74.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.74.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.74.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.75.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.75.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.75.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.76.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.76.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.76.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.77.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.77.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.77.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.78.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.78.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.78.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.79.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.79.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.79.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.8.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.8.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.8.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.80.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.80.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.80.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.81.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.81.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.81.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.82.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.82.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.82.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.83.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.83.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.83.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.84.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.84.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.84.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.85.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.85.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.85.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.86.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.86.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.86.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.87.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.87.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.87.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.88.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.88.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.88.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.89.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.89.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.89.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.9.w1.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.9.w2.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.9.w3.weight": "model-00162-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.90.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.90.w2.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.90.w3.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.91.w1.weight": "model-00165-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.91.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.91.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.92.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.92.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.92.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.93.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.93.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.93.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.94.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.94.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.94.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.95.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.95.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.95.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.96.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.96.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.96.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.97.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.97.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.97.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.98.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.98.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.98.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.99.w1.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.99.w2.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.experts.99.w3.weight": "model-00166-of-00195.safetensors", - "model.layers.29.block_sparse_moe.gate.weight": "model-00162-of-00195.safetensors", - "model.layers.29.input_layernorm.weight": "model-00167-of-00195.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00167-of-00195.safetensors", - "model.layers.29.residual_layernorm.weight": "model-00167-of-00195.safetensors", - "model.layers.29.residual_mlp.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.29.residual_mlp.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.29.residual_mlp.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.29.self_attn.k_proj.weight": "model-00162-of-00195.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00162-of-00195.safetensors", - "model.layers.29.self_attn.q_proj.weight": "model-00162-of-00195.safetensors", - "model.layers.29.self_attn.v_proj.weight": "model-00162-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.10.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.10.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.10.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.100.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.100.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.100.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.101.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.101.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.101.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.102.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.102.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.102.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.103.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.103.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.103.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.104.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.104.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.104.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.105.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.105.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.105.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.106.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.106.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.106.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.107.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.107.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.107.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.108.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.108.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.108.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.109.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.109.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.109.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.11.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.11.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.11.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.110.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.110.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.110.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.111.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.111.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.111.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.112.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.112.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.112.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.113.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.113.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.113.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.114.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.114.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.114.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.115.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.115.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.115.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.116.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.116.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.116.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.117.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.117.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.117.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.118.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.118.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.118.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.119.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.119.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.119.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.12.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.12.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.12.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.120.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.120.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.120.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.121.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.121.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.121.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.122.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.122.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.122.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.123.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.123.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.123.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.124.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.124.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.124.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.125.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.125.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.125.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.126.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.126.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.126.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.127.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.127.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.127.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.13.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.13.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.13.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.14.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.14.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.14.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.15.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.15.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.15.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.16.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.16.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.16.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.17.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.17.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.17.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.18.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.18.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.18.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.19.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.19.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.19.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.20.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.20.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.20.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.21.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.21.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.21.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.22.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.22.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.22.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.23.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.23.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.23.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.24.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.24.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.24.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.25.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.25.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.25.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.26.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.26.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.26.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.27.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.27.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.27.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.28.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.28.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.28.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.29.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.29.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.29.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00017-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00017-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.30.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.30.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.30.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.31.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.31.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.31.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.32.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.32.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.32.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.33.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.33.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.33.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.34.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.34.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.34.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.35.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.35.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.35.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.36.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.36.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.36.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.37.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.37.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.37.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.38.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.38.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.38.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.39.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.39.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.39.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00017-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.40.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.40.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.40.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.41.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.41.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.41.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.42.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.42.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.42.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.43.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.43.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.43.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.44.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.44.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.44.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.45.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.45.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.45.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.46.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.46.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.46.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.47.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.47.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.47.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.48.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.48.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.48.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.49.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.49.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.49.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.50.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.50.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.50.w3.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.51.w1.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.51.w2.weight": "model-00019-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.51.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.52.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.52.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.52.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.53.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.53.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.53.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.54.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.54.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.54.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.55.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.55.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.55.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.56.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.56.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.56.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.57.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.57.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.57.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.58.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.58.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.58.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.59.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.59.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.59.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.60.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.60.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.60.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.61.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.61.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.61.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.62.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.62.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.62.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.63.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.63.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.63.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.64.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.64.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.64.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.65.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.65.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.65.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.66.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.66.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.66.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.67.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.67.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.67.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.68.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.68.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.68.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.69.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.69.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.69.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.70.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.70.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.70.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.71.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.71.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.71.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.72.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.72.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.72.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.73.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.73.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.73.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.74.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.74.w2.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.74.w3.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.75.w1.weight": "model-00020-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.75.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.75.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.76.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.76.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.76.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.77.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.77.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.77.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.78.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.78.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.78.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.79.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.79.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.79.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.8.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.8.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.8.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.80.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.80.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.80.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.81.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.81.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.81.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.82.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.82.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.82.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.83.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.83.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.83.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.84.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.84.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.84.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.85.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.85.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.85.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.86.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.86.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.86.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.87.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.87.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.87.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.88.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.88.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.88.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.89.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.89.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.89.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.9.w1.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.9.w2.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.9.w3.weight": "model-00018-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.90.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.90.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.90.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.91.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.91.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.91.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.92.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.92.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.92.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.93.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.93.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.93.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.94.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.94.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.94.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.95.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.95.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.95.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.96.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.96.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.96.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.97.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.97.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.97.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.98.w1.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.98.w2.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.98.w3.weight": "model-00021-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.99.w1.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.99.w2.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.experts.99.w3.weight": "model-00022-of-00195.safetensors", - "model.layers.3.block_sparse_moe.gate.weight": "model-00017-of-00195.safetensors", - "model.layers.3.input_layernorm.weight": "model-00023-of-00195.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00023-of-00195.safetensors", - "model.layers.3.residual_layernorm.weight": "model-00023-of-00195.safetensors", - "model.layers.3.residual_mlp.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.3.residual_mlp.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.3.residual_mlp.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.3.self_attn.k_proj.weight": "model-00017-of-00195.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00017-of-00195.safetensors", - "model.layers.3.self_attn.q_proj.weight": "model-00017-of-00195.safetensors", - "model.layers.3.self_attn.v_proj.weight": "model-00017-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.10.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.10.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.10.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.100.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.100.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.100.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.101.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.101.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.101.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.102.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.102.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.102.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.103.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.103.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.103.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.104.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.104.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.104.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.105.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.105.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.105.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.106.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.106.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.106.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.107.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.107.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.107.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.108.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.108.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.108.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.109.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.109.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.109.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.11.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.11.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.11.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.110.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.110.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.110.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.111.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.111.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.111.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.112.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.112.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.112.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.113.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.113.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.113.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.114.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.114.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.114.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.115.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.115.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.115.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.116.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.116.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.116.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.117.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.117.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.117.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.118.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.118.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.118.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.119.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.119.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.119.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.12.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.12.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.12.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.120.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.120.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.120.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.121.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.121.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.121.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.122.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.122.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.122.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.123.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.123.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.123.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.124.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.124.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.124.w3.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.125.w1.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.125.w2.weight": "model-00172-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.125.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.126.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.126.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.126.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.127.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.127.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.127.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.13.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.13.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.13.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.14.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.14.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.14.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.15.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.15.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.15.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.16.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.16.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.16.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.17.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.17.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.17.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.18.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.18.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.18.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.19.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.19.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.19.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.20.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.20.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.20.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.21.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.21.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.21.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.22.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.22.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.22.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.23.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.23.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.23.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.24.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.24.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.24.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.25.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.25.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.25.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.26.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.26.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.26.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.27.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.27.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.27.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.28.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.28.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.28.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.29.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.29.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.29.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.30.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.30.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.30.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.31.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.31.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.31.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.32.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.32.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.32.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.33.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.33.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.33.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.34.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.34.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.34.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.35.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.35.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.35.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.36.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.36.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.36.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.37.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.37.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.37.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.38.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.38.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.38.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.39.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.39.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.39.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.40.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.40.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.40.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.41.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.41.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.41.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.42.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.42.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.42.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.43.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.43.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.43.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.44.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.44.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.44.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.45.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.45.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.45.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.46.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.46.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.46.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.47.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.47.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.47.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.48.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.48.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.48.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.49.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.49.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.49.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.50.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.50.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.50.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.51.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.51.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.51.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.52.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.52.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.52.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.53.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.53.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.53.w3.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.54.w1.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.54.w2.weight": "model-00169-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.54.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.55.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.55.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.55.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.56.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.56.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.56.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.57.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.57.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.57.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.58.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.58.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.58.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.59.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.59.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.59.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.60.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.60.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.60.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.61.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.61.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.61.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.62.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.62.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.62.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.63.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.63.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.63.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.64.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.64.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.64.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.65.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.65.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.65.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.66.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.66.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.66.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.67.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.67.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.67.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.68.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.68.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.68.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.69.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.69.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.69.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00167-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.70.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.70.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.70.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.71.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.71.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.71.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.72.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.72.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.72.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.73.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.73.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.73.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.74.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.74.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.74.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.75.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.75.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.75.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.76.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.76.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.76.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.77.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.77.w2.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.77.w3.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.78.w1.weight": "model-00170-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.78.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.78.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.79.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.79.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.79.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.8.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.8.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.8.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.80.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.80.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.80.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.81.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.81.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.81.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.82.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.82.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.82.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.83.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.83.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.83.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.84.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.84.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.84.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.85.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.85.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.85.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.86.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.86.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.86.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.87.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.87.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.87.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.88.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.88.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.88.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.89.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.89.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.89.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.9.w1.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.9.w2.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.9.w3.weight": "model-00168-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.90.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.90.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.90.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.91.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.91.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.91.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.92.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.92.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.92.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.93.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.93.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.93.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.94.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.94.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.94.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.95.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.95.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.95.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.96.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.96.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.96.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.97.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.97.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.97.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.98.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.98.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.98.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.99.w1.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.99.w2.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.experts.99.w3.weight": "model-00171-of-00195.safetensors", - "model.layers.30.block_sparse_moe.gate.weight": "model-00167-of-00195.safetensors", - "model.layers.30.input_layernorm.weight": "model-00173-of-00195.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00173-of-00195.safetensors", - "model.layers.30.residual_layernorm.weight": "model-00173-of-00195.safetensors", - "model.layers.30.residual_mlp.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.30.residual_mlp.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.30.residual_mlp.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.30.self_attn.k_proj.weight": "model-00167-of-00195.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00167-of-00195.safetensors", - "model.layers.30.self_attn.q_proj.weight": "model-00167-of-00195.safetensors", - "model.layers.30.self_attn.v_proj.weight": "model-00167-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.10.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.10.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.10.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.100.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.100.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.100.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.101.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.101.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.101.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.102.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.102.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.102.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.103.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.103.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.103.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.104.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.104.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.104.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.105.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.105.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.105.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.106.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.106.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.106.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.107.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.107.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.107.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.108.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.108.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.108.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.109.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.109.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.109.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.11.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.11.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.11.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.110.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.110.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.110.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.111.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.111.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.111.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.112.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.112.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.112.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.113.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.113.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.113.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.114.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.114.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.114.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.115.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.115.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.115.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.116.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.116.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.116.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.117.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.117.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.117.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.118.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.118.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.118.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.119.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.119.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.119.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.12.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.12.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.12.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.120.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.120.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.120.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.121.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.121.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.121.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.122.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.122.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.122.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.123.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.123.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.123.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.124.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.124.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.124.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.125.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.125.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.125.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.126.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.126.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.126.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.127.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.127.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.127.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.13.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.13.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.13.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.14.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.14.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.14.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.15.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.15.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.15.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.16.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.16.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.16.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.17.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.17.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.17.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.18.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.18.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.18.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.19.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.19.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.19.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.20.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.20.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.20.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.21.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.21.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.21.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.22.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.22.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.22.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.23.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.23.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.23.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.24.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.24.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.24.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.25.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.25.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.25.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.26.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.26.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.26.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.27.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.27.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.27.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.28.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.28.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.28.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.29.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.29.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.29.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.30.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.30.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.30.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.31.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.31.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.31.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.32.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.32.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.32.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.33.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.33.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.33.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.34.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.34.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.34.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.35.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.35.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.35.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.36.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.36.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.36.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.37.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.37.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.37.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.38.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.38.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.38.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.39.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.39.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.39.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.40.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.40.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.40.w3.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.41.w1.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.41.w2.weight": "model-00174-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.41.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.42.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.42.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.42.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.43.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.43.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.43.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.44.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.44.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.44.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.45.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.45.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.45.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.46.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.46.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.46.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.47.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.47.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.47.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.48.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.48.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.48.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.49.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.49.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.49.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.50.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.50.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.50.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.51.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.51.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.51.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.52.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.52.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.52.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.53.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.53.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.53.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.54.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.54.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.54.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.55.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.55.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.55.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.56.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.56.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.56.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.57.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.57.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.57.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.58.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.58.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.58.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.59.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.59.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.59.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.60.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.60.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.60.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.61.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.61.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.61.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.62.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.62.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.62.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.63.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.63.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.63.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.64.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.64.w2.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.64.w3.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.65.w1.weight": "model-00175-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.65.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.65.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.66.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.66.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.66.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.67.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.67.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.67.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.68.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.68.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.68.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.69.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.69.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.69.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.70.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.70.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.70.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.71.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.71.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.71.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.72.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.72.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.72.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.73.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.73.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.73.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.74.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.74.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.74.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.75.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.75.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.75.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.76.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.76.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.76.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.77.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.77.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.77.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.78.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.78.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.78.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.79.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.79.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.79.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.8.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.8.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.8.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.80.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.80.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.80.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.81.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.81.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.81.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.82.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.82.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.82.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.83.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.83.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.83.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.84.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.84.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.84.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.85.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.85.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.85.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.86.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.86.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.86.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.87.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.87.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.87.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.88.w1.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.88.w2.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.88.w3.weight": "model-00176-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.89.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.89.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.89.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.9.w1.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.9.w2.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.9.w3.weight": "model-00173-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.90.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.90.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.90.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.91.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.91.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.91.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.92.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.92.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.92.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.93.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.93.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.93.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.94.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.94.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.94.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.95.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.95.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.95.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.96.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.96.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.96.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.97.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.97.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.97.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.98.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.98.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.98.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.99.w1.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.99.w2.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.experts.99.w3.weight": "model-00177-of-00195.safetensors", - "model.layers.31.block_sparse_moe.gate.weight": "model-00173-of-00195.safetensors", - "model.layers.31.input_layernorm.weight": "model-00178-of-00195.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00178-of-00195.safetensors", - "model.layers.31.residual_layernorm.weight": "model-00178-of-00195.safetensors", - "model.layers.31.residual_mlp.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.31.residual_mlp.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.31.residual_mlp.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.31.self_attn.k_proj.weight": "model-00173-of-00195.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00173-of-00195.safetensors", - "model.layers.31.self_attn.q_proj.weight": "model-00173-of-00195.safetensors", - "model.layers.31.self_attn.v_proj.weight": "model-00173-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.0.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.0.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.0.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.1.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.1.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.1.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.10.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.10.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.10.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.100.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.100.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.100.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.101.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.101.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.101.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.102.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.102.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.102.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.103.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.103.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.103.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.104.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.104.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.104.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.105.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.105.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.105.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.106.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.106.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.106.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.107.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.107.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.107.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.108.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.108.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.108.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.109.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.109.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.109.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.11.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.11.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.11.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.110.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.110.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.110.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.111.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.111.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.111.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.112.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.112.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.112.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.113.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.113.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.113.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.114.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.114.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.114.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.115.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.115.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.115.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.116.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.116.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.116.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.117.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.117.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.117.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.118.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.118.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.118.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.119.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.119.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.119.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.12.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.12.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.12.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.120.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.120.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.120.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.121.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.121.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.121.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.122.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.122.w2.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.122.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.123.w1.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.123.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.123.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.124.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.124.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.124.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.125.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.125.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.125.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.126.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.126.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.126.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.127.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.127.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.127.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.13.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.13.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.13.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.14.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.14.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.14.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.15.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.15.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.15.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.16.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.16.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.16.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.17.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.17.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.17.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.18.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.18.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.18.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.19.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.19.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.19.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.2.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.2.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.2.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.20.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.20.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.20.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.21.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.21.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.21.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.22.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.22.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.22.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.23.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.23.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.23.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.24.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.24.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.24.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.25.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.25.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.25.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.26.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.26.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.26.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.27.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.27.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.27.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.28.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.28.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.28.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.29.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.29.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.29.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.3.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.3.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.3.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.30.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.30.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.30.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.31.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.31.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.31.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.32.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.32.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.32.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.33.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.33.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.33.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.34.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.34.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.34.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.35.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.35.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.35.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.36.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.36.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.36.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.37.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.37.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.37.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.38.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.38.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.38.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.39.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.39.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.39.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.4.w1.weight": "model-00178-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.4.w2.weight": "model-00178-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.4.w3.weight": "model-00178-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.40.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.40.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.40.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.41.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.41.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.41.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.42.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.42.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.42.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.43.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.43.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.43.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.44.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.44.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.44.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.45.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.45.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.45.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.46.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.46.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.46.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.47.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.47.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.47.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.48.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.48.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.48.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.49.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.49.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.49.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.5.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.5.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.5.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.50.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.50.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.50.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.51.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.51.w2.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.51.w3.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.52.w1.weight": "model-00180-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.52.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.52.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.53.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.53.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.53.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.54.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.54.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.54.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.55.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.55.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.55.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.56.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.56.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.56.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.57.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.57.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.57.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.58.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.58.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.58.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.59.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.59.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.59.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.6.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.6.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.6.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.60.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.60.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.60.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.61.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.61.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.61.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.62.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.62.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.62.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.63.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.63.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.63.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.64.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.64.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.64.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.65.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.65.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.65.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.66.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.66.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.66.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.67.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.67.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.67.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.68.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.68.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.68.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.69.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.69.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.69.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.7.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.7.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.7.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.70.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.70.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.70.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.71.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.71.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.71.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.72.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.72.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.72.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.73.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.73.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.73.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.74.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.74.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.74.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.75.w1.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.75.w2.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.75.w3.weight": "model-00181-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.76.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.76.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.76.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.77.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.77.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.77.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.78.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.78.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.78.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.79.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.79.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.79.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.8.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.8.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.8.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.80.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.80.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.80.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.81.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.81.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.81.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.82.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.82.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.82.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.83.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.83.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.83.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.84.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.84.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.84.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.85.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.85.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.85.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.86.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.86.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.86.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.87.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.87.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.87.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.88.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.88.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.88.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.89.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.89.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.89.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.9.w1.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.9.w2.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.9.w3.weight": "model-00179-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.90.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.90.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.90.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.91.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.91.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.91.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.92.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.92.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.92.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.93.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.93.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.93.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.94.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.94.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.94.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.95.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.95.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.95.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.96.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.96.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.96.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.97.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.97.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.97.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.98.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.98.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.98.w3.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.99.w1.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.99.w2.weight": "model-00182-of-00195.safetensors", - "model.layers.32.block_sparse_moe.experts.99.w3.weight": "model-00183-of-00195.safetensors", - "model.layers.32.block_sparse_moe.gate.weight": "model-00178-of-00195.safetensors", - "model.layers.32.input_layernorm.weight": "model-00184-of-00195.safetensors", - "model.layers.32.post_attention_layernorm.weight": "model-00184-of-00195.safetensors", - "model.layers.32.residual_layernorm.weight": "model-00184-of-00195.safetensors", - "model.layers.32.residual_mlp.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.32.residual_mlp.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.32.residual_mlp.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.32.self_attn.k_proj.weight": "model-00178-of-00195.safetensors", - "model.layers.32.self_attn.o_proj.weight": "model-00178-of-00195.safetensors", - "model.layers.32.self_attn.q_proj.weight": "model-00178-of-00195.safetensors", - "model.layers.32.self_attn.v_proj.weight": "model-00178-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.0.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.0.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.0.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.1.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.1.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.1.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.10.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.10.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.10.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.100.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.100.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.100.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.101.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.101.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.101.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.102.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.102.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.102.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.103.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.103.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.103.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.104.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.104.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.104.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.105.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.105.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.105.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.106.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.106.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.106.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.107.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.107.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.107.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.108.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.108.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.108.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.109.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.109.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.109.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.11.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.11.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.11.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.110.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.110.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.110.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.111.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.111.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.111.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.112.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.112.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.112.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.113.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.113.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.113.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.114.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.114.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.114.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.115.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.115.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.115.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.116.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.116.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.116.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.117.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.117.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.117.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.118.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.118.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.118.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.119.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.119.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.119.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.12.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.12.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.12.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.120.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.120.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.120.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.121.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.121.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.121.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.122.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.122.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.122.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.123.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.123.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.123.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.124.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.124.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.124.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.125.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.125.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.125.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.126.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.126.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.126.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.127.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.127.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.127.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.13.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.13.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.13.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.14.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.14.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.14.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.15.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.15.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.15.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.16.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.16.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.16.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.17.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.17.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.17.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.18.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.18.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.18.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.19.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.19.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.19.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.2.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.2.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.2.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.20.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.20.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.20.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.21.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.21.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.21.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.22.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.22.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.22.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.23.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.23.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.23.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.24.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.24.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.24.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.25.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.25.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.25.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.26.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.26.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.26.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.27.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.27.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.27.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.28.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.28.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.28.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.29.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.29.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.29.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.3.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.3.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.3.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.30.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.30.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.30.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.31.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.31.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.31.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.32.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.32.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.32.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.33.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.33.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.33.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.34.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.34.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.34.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.35.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.35.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.35.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.36.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.36.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.36.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.37.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.37.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.37.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.38.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.38.w2.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.38.w3.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.39.w1.weight": "model-00185-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.39.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.39.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.4.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.4.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.4.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.40.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.40.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.40.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.41.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.41.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.41.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.42.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.42.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.42.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.43.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.43.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.43.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.44.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.44.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.44.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.45.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.45.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.45.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.46.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.46.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.46.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.47.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.47.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.47.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.48.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.48.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.48.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.49.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.49.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.49.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.5.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.5.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.5.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.50.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.50.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.50.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.51.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.51.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.51.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.52.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.52.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.52.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.53.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.53.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.53.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.54.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.54.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.54.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.55.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.55.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.55.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.56.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.56.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.56.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.57.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.57.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.57.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.58.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.58.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.58.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.59.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.59.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.59.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.6.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.6.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.6.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.60.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.60.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.60.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.61.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.61.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.61.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.62.w1.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.62.w2.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.62.w3.weight": "model-00186-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.63.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.63.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.63.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.64.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.64.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.64.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.65.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.65.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.65.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.66.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.66.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.66.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.67.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.67.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.67.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.68.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.68.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.68.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.69.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.69.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.69.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.7.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.7.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.7.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.70.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.70.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.70.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.71.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.71.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.71.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.72.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.72.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.72.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.73.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.73.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.73.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.74.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.74.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.74.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.75.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.75.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.75.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.76.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.76.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.76.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.77.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.77.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.77.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.78.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.78.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.78.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.79.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.79.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.79.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.8.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.8.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.8.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.80.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.80.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.80.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.81.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.81.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.81.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.82.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.82.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.82.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.83.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.83.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.83.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.84.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.84.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.84.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.85.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.85.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.85.w3.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.86.w1.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.86.w2.weight": "model-00187-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.86.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.87.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.87.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.87.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.88.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.88.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.88.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.89.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.89.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.89.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.9.w1.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.9.w2.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.9.w3.weight": "model-00184-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.90.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.90.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.90.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.91.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.91.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.91.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.92.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.92.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.92.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.93.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.93.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.93.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.94.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.94.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.94.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.95.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.95.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.95.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.96.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.96.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.96.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.97.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.97.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.97.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.98.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.98.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.98.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.99.w1.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.99.w2.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.experts.99.w3.weight": "model-00188-of-00195.safetensors", - "model.layers.33.block_sparse_moe.gate.weight": "model-00184-of-00195.safetensors", - "model.layers.33.input_layernorm.weight": "model-00189-of-00195.safetensors", - "model.layers.33.post_attention_layernorm.weight": "model-00189-of-00195.safetensors", - "model.layers.33.residual_layernorm.weight": "model-00189-of-00195.safetensors", - "model.layers.33.residual_mlp.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.33.residual_mlp.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.33.residual_mlp.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.33.self_attn.k_proj.weight": "model-00184-of-00195.safetensors", - "model.layers.33.self_attn.o_proj.weight": "model-00184-of-00195.safetensors", - "model.layers.33.self_attn.q_proj.weight": "model-00184-of-00195.safetensors", - "model.layers.33.self_attn.v_proj.weight": "model-00184-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.0.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.0.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.0.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.1.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.1.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.1.w3.weight": "model-00189-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.10.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.10.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.10.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.100.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.100.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.100.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.101.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.101.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.101.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.102.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.102.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.102.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.103.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.103.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.103.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.104.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.104.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.104.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.105.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.105.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.105.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.106.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.106.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.106.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.107.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.107.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.107.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.108.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.108.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.108.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.109.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.109.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.109.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.11.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.11.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.11.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.110.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.110.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.110.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.111.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.111.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.111.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.112.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.112.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.112.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.113.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.113.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.113.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.114.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.114.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.114.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.115.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.115.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.115.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.116.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.116.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.116.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.117.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.117.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.117.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.118.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.118.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.118.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.119.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.119.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.119.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.12.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.12.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.12.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.120.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.120.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.120.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.121.w1.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.121.w2.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.121.w3.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.122.w1.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.122.w2.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.122.w3.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.123.w1.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.123.w2.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.123.w3.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.124.w1.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.124.w2.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.124.w3.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.125.w1.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.125.w2.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.125.w3.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.126.w1.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.126.w2.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.126.w3.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.127.w1.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.127.w2.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.127.w3.weight": "model-00195-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.13.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.13.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.13.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.14.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.14.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.14.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.15.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.15.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.15.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.16.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.16.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.16.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.17.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.17.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.17.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.18.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.18.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.18.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.19.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.19.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.19.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.2.w1.weight": "model-00189-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.2.w2.weight": "model-00189-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.2.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.20.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.20.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.20.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.21.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.21.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.21.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.22.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.22.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.22.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.23.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.23.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.23.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.24.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.24.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.24.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.25.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.25.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.25.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.26.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.26.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.26.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.27.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.27.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.27.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.28.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.28.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.28.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.29.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.29.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.29.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.3.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.3.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.3.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.30.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.30.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.30.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.31.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.31.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.31.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.32.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.32.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.32.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.33.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.33.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.33.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.34.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.34.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.34.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.35.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.35.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.35.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.36.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.36.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.36.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.37.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.37.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.37.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.38.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.38.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.38.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.39.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.39.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.39.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.4.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.4.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.4.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.40.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.40.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.40.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.41.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.41.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.41.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.42.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.42.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.42.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.43.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.43.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.43.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.44.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.44.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.44.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.45.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.45.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.45.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.46.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.46.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.46.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.47.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.47.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.47.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.48.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.48.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.48.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.49.w1.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.49.w2.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.49.w3.weight": "model-00191-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.5.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.5.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.5.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.50.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.50.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.50.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.51.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.51.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.51.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.52.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.52.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.52.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.53.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.53.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.53.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.54.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.54.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.54.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.55.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.55.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.55.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.56.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.56.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.56.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.57.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.57.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.57.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.58.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.58.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.58.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.59.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.59.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.59.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.6.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.6.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.6.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.60.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.60.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.60.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.61.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.61.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.61.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.62.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.62.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.62.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.63.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.63.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.63.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.64.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.64.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.64.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.65.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.65.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.65.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.66.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.66.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.66.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.67.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.67.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.67.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.68.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.68.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.68.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.69.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.69.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.69.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.7.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.7.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.7.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.70.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.70.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.70.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.71.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.71.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.71.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.72.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.72.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.72.w3.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.73.w1.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.73.w2.weight": "model-00192-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.73.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.74.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.74.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.74.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.75.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.75.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.75.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.76.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.76.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.76.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.77.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.77.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.77.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.78.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.78.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.78.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.79.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.79.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.79.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.8.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.8.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.8.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.80.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.80.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.80.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.81.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.81.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.81.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.82.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.82.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.82.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.83.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.83.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.83.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.84.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.84.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.84.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.85.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.85.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.85.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.86.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.86.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.86.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.87.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.87.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.87.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.88.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.88.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.88.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.89.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.89.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.89.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.9.w1.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.9.w2.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.9.w3.weight": "model-00190-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.90.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.90.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.90.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.91.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.91.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.91.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.92.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.92.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.92.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.93.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.93.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.93.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.94.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.94.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.94.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.95.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.95.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.95.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.96.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.96.w2.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.96.w3.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.97.w1.weight": "model-00193-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.97.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.97.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.98.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.98.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.98.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.99.w1.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.99.w2.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.experts.99.w3.weight": "model-00194-of-00195.safetensors", - "model.layers.34.block_sparse_moe.gate.weight": "model-00189-of-00195.safetensors", - "model.layers.34.input_layernorm.weight": "model-00195-of-00195.safetensors", - "model.layers.34.post_attention_layernorm.weight": "model-00195-of-00195.safetensors", - "model.layers.34.residual_layernorm.weight": "model-00195-of-00195.safetensors", - "model.layers.34.residual_mlp.w1.weight": "model-00195-of-00195.safetensors", - "model.layers.34.residual_mlp.w2.weight": "model-00195-of-00195.safetensors", - "model.layers.34.residual_mlp.w3.weight": "model-00195-of-00195.safetensors", - "model.layers.34.self_attn.k_proj.weight": "model-00189-of-00195.safetensors", - "model.layers.34.self_attn.o_proj.weight": "model-00189-of-00195.safetensors", - "model.layers.34.self_attn.q_proj.weight": "model-00189-of-00195.safetensors", - "model.layers.34.self_attn.v_proj.weight": "model-00189-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.10.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.10.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.10.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.100.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.100.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.100.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.101.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.101.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.101.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.102.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.102.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.102.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.103.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.103.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.103.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.104.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.104.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.104.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.105.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.105.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.105.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.106.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.106.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.106.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.107.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.107.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.107.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.108.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.108.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.108.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.109.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.109.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.109.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.11.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.11.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.11.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.110.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.110.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.110.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.111.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.111.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.111.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.112.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.112.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.112.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.113.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.113.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.113.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.114.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.114.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.114.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.115.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.115.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.115.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.116.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.116.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.116.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.117.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.117.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.117.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.118.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.118.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.118.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.119.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.119.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.119.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.12.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.12.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.12.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.120.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.120.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.120.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.121.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.121.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.121.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.122.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.122.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.122.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.123.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.123.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.123.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.124.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.124.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.124.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.125.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.125.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.125.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.126.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.126.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.126.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.127.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.127.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.127.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.13.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.13.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.13.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.14.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.14.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.14.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.15.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.15.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.15.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.16.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.16.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.16.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.17.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.17.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.17.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.18.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.18.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.18.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.19.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.19.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.19.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.20.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.20.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.20.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.21.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.21.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.21.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.22.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.22.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.22.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.23.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.23.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.23.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.24.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.24.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.24.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.25.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.25.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.25.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.26.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.26.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.26.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.27.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.27.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.27.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.28.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.28.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.28.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.29.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.29.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.29.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.30.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.30.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.30.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.31.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.31.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.31.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.32.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.32.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.32.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.33.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.33.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.33.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.34.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.34.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.34.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.35.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.35.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.35.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.36.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.36.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.36.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.37.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.37.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.37.w3.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.38.w1.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.38.w2.weight": "model-00024-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.38.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.39.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.39.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.39.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.40.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.40.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.40.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.41.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.41.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.41.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.42.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.42.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.42.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.43.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.43.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.43.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.44.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.44.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.44.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.45.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.45.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.45.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.46.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.46.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.46.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.47.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.47.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.47.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.48.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.48.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.48.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.49.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.49.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.49.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.50.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.50.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.50.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.51.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.51.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.51.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.52.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.52.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.52.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.53.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.53.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.53.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.54.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.54.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.54.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.55.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.55.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.55.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.56.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.56.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.56.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.57.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.57.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.57.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.58.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.58.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.58.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.59.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.59.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.59.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.60.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.60.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.60.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.61.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.61.w2.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.61.w3.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.62.w1.weight": "model-00025-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.62.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.62.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.63.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.63.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.63.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.64.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.64.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.64.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.65.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.65.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.65.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.66.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.66.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.66.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.67.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.67.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.67.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.68.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.68.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.68.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.69.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.69.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.69.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.70.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.70.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.70.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.71.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.71.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.71.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.72.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.72.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.72.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.73.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.73.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.73.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.74.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.74.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.74.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.75.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.75.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.75.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.76.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.76.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.76.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.77.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.77.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.77.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.78.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.78.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.78.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.79.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.79.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.79.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.8.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.8.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.8.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.80.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.80.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.80.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.81.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.81.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.81.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.82.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.82.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.82.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.83.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.83.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.83.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.84.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.84.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.84.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.85.w1.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.85.w2.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.85.w3.weight": "model-00026-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.86.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.86.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.86.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.87.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.87.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.87.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.88.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.88.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.88.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.89.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.89.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.89.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.9.w1.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.9.w2.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.9.w3.weight": "model-00023-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.90.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.90.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.90.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.91.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.91.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.91.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.92.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.92.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.92.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.93.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.93.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.93.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.94.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.94.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.94.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.95.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.95.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.95.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.96.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.96.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.96.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.97.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.97.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.97.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.98.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.98.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.98.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.99.w1.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.99.w2.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.experts.99.w3.weight": "model-00027-of-00195.safetensors", - "model.layers.4.block_sparse_moe.gate.weight": "model-00023-of-00195.safetensors", - "model.layers.4.input_layernorm.weight": "model-00028-of-00195.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00028-of-00195.safetensors", - "model.layers.4.residual_layernorm.weight": "model-00028-of-00195.safetensors", - "model.layers.4.residual_mlp.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.4.residual_mlp.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.4.residual_mlp.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.4.self_attn.k_proj.weight": "model-00023-of-00195.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00023-of-00195.safetensors", - "model.layers.4.self_attn.q_proj.weight": "model-00023-of-00195.safetensors", - "model.layers.4.self_attn.v_proj.weight": "model-00023-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00028-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00028-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00028-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.10.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.10.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.10.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.100.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.100.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.100.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.101.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.101.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.101.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.102.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.102.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.102.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.103.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.103.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.103.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.104.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.104.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.104.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.105.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.105.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.105.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.106.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.106.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.106.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.107.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.107.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.107.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.108.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.108.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.108.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.109.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.109.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.109.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.11.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.11.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.11.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.110.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.110.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.110.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.111.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.111.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.111.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.112.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.112.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.112.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.113.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.113.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.113.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.114.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.114.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.114.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.115.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.115.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.115.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.116.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.116.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.116.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.117.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.117.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.117.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.118.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.118.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.118.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.119.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.119.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.119.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.12.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.12.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.12.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.120.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.120.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.120.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.121.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.121.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.121.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.122.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.122.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.122.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.123.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.123.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.123.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.124.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.124.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.124.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.125.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.125.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.125.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.126.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.126.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.126.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.127.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.127.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.127.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.13.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.13.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.13.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.14.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.14.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.14.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.15.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.15.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.15.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.16.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.16.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.16.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.17.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.17.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.17.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.18.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.18.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.18.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.19.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.19.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.19.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.20.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.20.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.20.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.21.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.21.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.21.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.22.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.22.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.22.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.23.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.23.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.23.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.24.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.24.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.24.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.25.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.25.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.25.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.26.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.26.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.26.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.27.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.27.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.27.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.28.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.28.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.28.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.29.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.29.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.29.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.30.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.30.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.30.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.31.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.31.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.31.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.32.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.32.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.32.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.33.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.33.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.33.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.34.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.34.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.34.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.35.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.35.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.35.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.36.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.36.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.36.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.37.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.37.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.37.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.38.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.38.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.38.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.39.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.39.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.39.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.40.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.40.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.40.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.41.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.41.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.41.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.42.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.42.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.42.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.43.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.43.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.43.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.44.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.44.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.44.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.45.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.45.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.45.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.46.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.46.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.46.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.47.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.47.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.47.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.48.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.48.w2.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.48.w3.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.49.w1.weight": "model-00030-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.49.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.49.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.50.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.50.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.50.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.51.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.51.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.51.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.52.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.52.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.52.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.53.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.53.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.53.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.54.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.54.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.54.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.55.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.55.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.55.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.56.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.56.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.56.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.57.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.57.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.57.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.58.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.58.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.58.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.59.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.59.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.59.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.60.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.60.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.60.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.61.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.61.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.61.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.62.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.62.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.62.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.63.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.63.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.63.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.64.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.64.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.64.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.65.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.65.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.65.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.66.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.66.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.66.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.67.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.67.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.67.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.68.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.68.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.68.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.69.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.69.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.69.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.70.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.70.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.70.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.71.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.71.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.71.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.72.w1.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.72.w2.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.72.w3.weight": "model-00031-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.73.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.73.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.73.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.74.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.74.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.74.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.75.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.75.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.75.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.76.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.76.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.76.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.77.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.77.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.77.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.78.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.78.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.78.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.79.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.79.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.79.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.8.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.8.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.8.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.80.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.80.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.80.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.81.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.81.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.81.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.82.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.82.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.82.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.83.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.83.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.83.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.84.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.84.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.84.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.85.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.85.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.85.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.86.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.86.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.86.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.87.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.87.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.87.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.88.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.88.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.88.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.89.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.89.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.89.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.9.w1.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.9.w2.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.9.w3.weight": "model-00029-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.90.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.90.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.90.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.91.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.91.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.91.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.92.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.92.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.92.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.93.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.93.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.93.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.94.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.94.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.94.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.95.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.95.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.95.w3.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.96.w1.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.96.w2.weight": "model-00032-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.96.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.97.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.97.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.97.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.98.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.98.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.98.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.99.w1.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.99.w2.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.experts.99.w3.weight": "model-00033-of-00195.safetensors", - "model.layers.5.block_sparse_moe.gate.weight": "model-00028-of-00195.safetensors", - "model.layers.5.input_layernorm.weight": "model-00034-of-00195.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00034-of-00195.safetensors", - "model.layers.5.residual_layernorm.weight": "model-00034-of-00195.safetensors", - "model.layers.5.residual_mlp.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.5.residual_mlp.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.5.residual_mlp.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.5.self_attn.k_proj.weight": "model-00028-of-00195.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00028-of-00195.safetensors", - "model.layers.5.self_attn.q_proj.weight": "model-00028-of-00195.safetensors", - "model.layers.5.self_attn.v_proj.weight": "model-00028-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.10.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.10.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.10.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.100.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.100.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.100.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.101.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.101.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.101.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.102.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.102.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.102.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.103.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.103.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.103.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.104.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.104.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.104.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.105.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.105.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.105.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.106.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.106.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.106.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.107.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.107.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.107.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.108.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.108.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.108.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.109.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.109.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.109.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.11.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.11.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.11.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.110.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.110.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.110.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.111.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.111.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.111.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.112.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.112.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.112.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.113.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.113.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.113.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.114.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.114.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.114.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.115.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.115.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.115.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.116.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.116.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.116.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.117.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.117.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.117.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.118.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.118.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.118.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.119.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.119.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.119.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.12.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.12.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.12.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.120.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.120.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.120.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.121.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.121.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.121.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.122.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.122.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.122.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.123.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.123.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.123.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.124.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.124.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.124.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.125.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.125.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.125.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.126.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.126.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.126.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.127.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.127.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.127.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.13.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.13.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.13.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.14.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.14.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.14.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.15.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.15.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.15.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.16.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.16.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.16.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.17.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.17.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.17.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.18.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.18.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.18.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.19.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.19.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.19.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.20.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.20.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.20.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.21.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.21.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.21.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.22.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.22.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.22.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.23.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.23.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.23.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.24.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.24.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.24.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.25.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.25.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.25.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.26.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.26.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.26.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.27.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.27.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.27.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.28.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.28.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.28.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.29.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.29.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.29.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.30.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.30.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.30.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.31.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.31.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.31.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.32.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.32.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.32.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.33.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.33.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.33.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.34.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.34.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.34.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.35.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.35.w2.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.35.w3.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.36.w1.weight": "model-00035-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.36.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.36.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.37.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.37.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.37.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.38.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.38.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.38.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.39.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.39.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.39.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.40.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.40.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.40.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.41.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.41.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.41.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.42.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.42.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.42.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.43.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.43.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.43.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.44.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.44.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.44.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.45.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.45.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.45.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.46.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.46.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.46.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.47.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.47.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.47.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.48.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.48.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.48.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.49.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.49.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.49.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.50.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.50.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.50.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.51.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.51.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.51.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.52.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.52.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.52.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.53.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.53.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.53.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.54.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.54.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.54.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.55.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.55.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.55.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.56.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.56.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.56.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.57.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.57.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.57.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.58.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.58.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.58.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.59.w1.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.59.w2.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.59.w3.weight": "model-00036-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.60.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.60.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.60.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.61.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.61.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.61.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.62.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.62.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.62.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.63.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.63.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.63.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.64.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.64.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.64.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.65.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.65.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.65.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.66.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.66.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.66.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.67.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.67.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.67.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.68.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.68.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.68.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.69.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.69.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.69.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.70.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.70.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.70.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.71.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.71.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.71.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.72.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.72.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.72.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.73.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.73.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.73.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.74.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.74.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.74.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.75.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.75.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.75.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.76.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.76.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.76.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.77.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.77.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.77.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.78.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.78.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.78.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.79.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.79.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.79.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.8.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.8.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.8.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.80.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.80.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.80.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.81.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.81.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.81.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.82.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.82.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.82.w3.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.83.w1.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.83.w2.weight": "model-00037-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.83.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.84.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.84.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.84.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.85.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.85.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.85.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.86.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.86.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.86.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.87.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.87.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.87.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.88.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.88.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.88.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.89.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.89.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.89.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.9.w1.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.9.w2.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.9.w3.weight": "model-00034-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.90.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.90.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.90.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.91.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.91.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.91.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.92.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.92.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.92.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.93.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.93.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.93.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.94.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.94.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.94.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.95.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.95.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.95.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.96.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.96.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.96.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.97.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.97.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.97.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.98.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.98.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.98.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.99.w1.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.99.w2.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.experts.99.w3.weight": "model-00038-of-00195.safetensors", - "model.layers.6.block_sparse_moe.gate.weight": "model-00034-of-00195.safetensors", - "model.layers.6.input_layernorm.weight": "model-00039-of-00195.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00039-of-00195.safetensors", - "model.layers.6.residual_layernorm.weight": "model-00039-of-00195.safetensors", - "model.layers.6.residual_mlp.w1.weight": "model-00039-of-00195.safetensors", - "model.layers.6.residual_mlp.w2.weight": "model-00039-of-00195.safetensors", - "model.layers.6.residual_mlp.w3.weight": "model-00039-of-00195.safetensors", - "model.layers.6.self_attn.k_proj.weight": "model-00034-of-00195.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00034-of-00195.safetensors", - "model.layers.6.self_attn.q_proj.weight": "model-00034-of-00195.safetensors", - "model.layers.6.self_attn.v_proj.weight": "model-00034-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.10.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.10.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.10.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.100.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.100.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.100.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.101.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.101.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.101.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.102.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.102.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.102.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.103.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.103.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.103.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.104.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.104.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.104.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.105.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.105.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.105.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.106.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.106.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.106.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.107.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.107.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.107.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.108.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.108.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.108.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.109.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.109.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.109.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.11.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.11.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.11.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.110.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.110.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.110.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.111.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.111.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.111.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.112.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.112.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.112.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.113.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.113.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.113.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.114.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.114.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.114.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.115.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.115.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.115.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.116.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.116.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.116.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.117.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.117.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.117.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.118.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.118.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.118.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.119.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.119.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.119.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.12.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.12.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.12.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.120.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.120.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.120.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.121.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.121.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.121.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.122.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.122.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.122.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.123.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.123.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.123.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.124.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.124.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.124.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.125.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.125.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.125.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.126.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.126.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.126.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.127.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.127.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.127.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.13.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.13.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.13.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.14.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.14.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.14.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.15.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.15.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.15.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.16.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.16.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.16.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.17.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.17.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.17.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.18.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.18.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.18.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.19.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.19.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.19.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.20.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.20.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.20.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.21.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.21.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.21.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.22.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.22.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.22.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.23.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.23.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.23.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.24.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.24.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.24.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.25.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.25.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.25.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.26.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.26.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.26.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.27.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.27.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.27.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.28.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.28.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.28.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.29.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.29.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.29.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.30.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.30.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.30.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.31.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.31.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.31.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.32.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.32.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.32.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.33.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.33.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.33.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.34.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.34.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.34.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.35.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.35.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.35.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.36.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.36.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.36.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.37.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.37.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.37.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.38.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.38.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.38.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.39.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.39.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.39.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.40.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.40.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.40.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.41.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.41.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.41.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.42.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.42.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.42.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.43.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.43.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.43.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.44.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.44.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.44.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.45.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.45.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.45.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.46.w1.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.46.w2.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.46.w3.weight": "model-00041-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.47.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.47.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.47.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.48.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.48.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.48.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.49.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.49.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.49.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.50.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.50.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.50.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.51.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.51.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.51.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.52.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.52.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.52.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.53.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.53.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.53.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.54.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.54.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.54.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.55.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.55.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.55.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.56.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.56.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.56.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.57.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.57.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.57.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.58.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.58.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.58.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.59.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.59.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.59.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.60.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.60.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.60.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.61.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.61.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.61.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.62.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.62.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.62.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.63.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.63.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.63.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.64.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.64.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.64.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.65.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.65.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.65.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.66.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.66.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.66.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.67.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.67.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.67.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.68.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.68.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.68.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.69.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.69.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.69.w3.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.70.w1.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.70.w2.weight": "model-00042-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.70.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.71.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.71.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.71.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.72.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.72.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.72.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.73.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.73.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.73.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.74.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.74.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.74.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.75.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.75.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.75.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.76.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.76.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.76.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.77.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.77.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.77.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.78.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.78.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.78.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.79.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.79.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.79.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.8.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.8.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.8.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.80.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.80.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.80.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.81.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.81.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.81.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.82.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.82.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.82.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.83.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.83.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.83.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.84.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.84.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.84.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.85.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.85.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.85.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.86.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.86.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.86.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.87.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.87.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.87.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.88.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.88.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.88.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.89.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.89.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.89.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.9.w1.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.9.w2.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.9.w3.weight": "model-00040-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.90.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.90.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.90.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.91.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.91.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.91.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.92.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.92.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.92.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.93.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.93.w2.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.93.w3.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.94.w1.weight": "model-00043-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.94.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.94.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.95.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.95.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.95.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.96.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.96.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.96.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.97.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.97.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.97.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.98.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.98.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.98.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.99.w1.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.99.w2.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.experts.99.w3.weight": "model-00044-of-00195.safetensors", - "model.layers.7.block_sparse_moe.gate.weight": "model-00040-of-00195.safetensors", - "model.layers.7.input_layernorm.weight": "model-00045-of-00195.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00045-of-00195.safetensors", - "model.layers.7.residual_layernorm.weight": "model-00045-of-00195.safetensors", - "model.layers.7.residual_mlp.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.7.residual_mlp.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.7.residual_mlp.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.7.self_attn.k_proj.weight": "model-00039-of-00195.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00040-of-00195.safetensors", - "model.layers.7.self_attn.q_proj.weight": "model-00039-of-00195.safetensors", - "model.layers.7.self_attn.v_proj.weight": "model-00039-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.10.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.10.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.10.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.100.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.100.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.100.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.101.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.101.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.101.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.102.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.102.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.102.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.103.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.103.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.103.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.104.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.104.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.104.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.105.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.105.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.105.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.106.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.106.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.106.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.107.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.107.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.107.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.108.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.108.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.108.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.109.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.109.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.109.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.11.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.11.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.11.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.110.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.110.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.110.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.111.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.111.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.111.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.112.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.112.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.112.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.113.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.113.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.113.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.114.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.114.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.114.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.115.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.115.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.115.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.116.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.116.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.116.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.117.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.117.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.117.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.118.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.118.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.118.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.119.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.119.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.119.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.12.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.12.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.12.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.120.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.120.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.120.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.121.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.121.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.121.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.122.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.122.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.122.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.123.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.123.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.123.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.124.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.124.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.124.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.125.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.125.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.125.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.126.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.126.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.126.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.127.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.127.w2.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.127.w3.weight": "model-00050-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.13.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.13.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.13.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.14.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.14.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.14.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.15.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.15.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.15.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.16.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.16.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.16.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.17.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.17.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.17.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.18.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.18.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.18.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.19.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.19.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.19.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.20.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.20.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.20.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.21.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.21.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.21.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.22.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.22.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.22.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.23.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.23.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.23.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.24.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.24.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.24.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.25.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.25.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.25.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.26.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.26.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.26.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.27.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.27.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.27.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.28.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.28.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.28.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.29.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.29.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.29.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.30.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.30.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.30.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.31.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.31.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.31.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.32.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.32.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.32.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.33.w1.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.33.w2.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.33.w3.weight": "model-00046-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.34.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.34.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.34.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.35.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.35.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.35.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.36.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.36.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.36.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.37.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.37.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.37.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.38.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.38.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.38.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.39.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.39.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.39.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.40.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.40.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.40.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.41.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.41.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.41.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.42.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.42.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.42.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.43.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.43.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.43.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.44.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.44.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.44.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.45.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.45.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.45.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.46.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.46.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.46.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.47.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.47.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.47.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.48.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.48.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.48.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.49.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.49.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.49.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.50.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.50.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.50.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.51.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.51.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.51.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.52.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.52.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.52.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.53.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.53.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.53.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.54.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.54.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.54.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.55.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.55.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.55.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.56.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.56.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.56.w3.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.57.w1.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.57.w2.weight": "model-00047-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.57.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.58.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.58.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.58.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.59.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.59.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.59.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.60.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.60.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.60.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.61.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.61.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.61.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.62.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.62.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.62.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.63.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.63.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.63.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.64.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.64.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.64.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.65.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.65.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.65.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.66.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.66.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.66.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.67.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.67.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.67.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.68.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.68.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.68.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.69.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.69.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.69.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.70.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.70.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.70.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.71.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.71.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.71.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.72.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.72.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.72.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.73.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.73.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.73.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.74.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.74.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.74.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.75.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.75.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.75.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.76.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.76.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.76.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.77.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.77.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.77.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.78.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.78.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.78.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.79.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.79.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.79.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.8.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.8.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.8.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.80.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.80.w2.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.80.w3.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.81.w1.weight": "model-00048-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.81.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.81.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.82.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.82.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.82.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.83.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.83.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.83.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.84.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.84.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.84.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.85.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.85.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.85.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.86.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.86.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.86.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.87.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.87.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.87.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.88.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.88.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.88.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.89.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.89.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.89.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.9.w1.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.9.w2.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.9.w3.weight": "model-00045-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.90.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.90.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.90.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.91.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.91.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.91.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.92.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.92.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.92.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.93.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.93.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.93.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.94.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.94.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.94.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.95.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.95.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.95.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.96.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.96.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.96.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.97.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.97.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.97.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.98.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.98.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.98.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.99.w1.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.99.w2.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.experts.99.w3.weight": "model-00049-of-00195.safetensors", - "model.layers.8.block_sparse_moe.gate.weight": "model-00045-of-00195.safetensors", - "model.layers.8.input_layernorm.weight": "model-00050-of-00195.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00050-of-00195.safetensors", - "model.layers.8.residual_layernorm.weight": "model-00050-of-00195.safetensors", - "model.layers.8.residual_mlp.w1.weight": "model-00050-of-00195.safetensors", - "model.layers.8.residual_mlp.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.8.residual_mlp.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.8.self_attn.k_proj.weight": "model-00045-of-00195.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00045-of-00195.safetensors", - "model.layers.8.self_attn.q_proj.weight": "model-00045-of-00195.safetensors", - "model.layers.8.self_attn.v_proj.weight": "model-00045-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.10.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.10.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.10.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.100.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.100.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.100.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.101.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.101.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.101.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.102.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.102.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.102.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.103.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.103.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.103.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.104.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.104.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.104.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.105.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.105.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.105.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.106.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.106.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.106.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.107.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.107.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.107.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.108.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.108.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.108.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.109.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.109.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.109.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.11.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.11.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.11.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.110.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.110.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.110.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.111.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.111.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.111.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.112.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.112.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.112.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.113.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.113.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.113.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.114.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.114.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.114.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.115.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.115.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.115.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.116.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.116.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.116.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.117.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.117.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.117.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.118.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.118.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.118.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.119.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.119.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.119.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.12.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.12.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.12.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.120.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.120.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.120.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.121.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.121.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.121.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.122.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.122.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.122.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.123.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.123.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.123.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.124.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.124.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.124.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.125.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.125.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.125.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.126.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.126.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.126.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.127.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.127.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.127.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.13.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.13.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.13.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.14.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.14.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.14.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.15.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.15.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.15.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.16.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.16.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.16.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.17.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.17.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.17.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.18.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.18.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.18.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.19.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.19.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.19.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.20.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.20.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.20.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.21.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.21.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.21.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.22.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.22.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.22.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.23.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.23.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.23.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.24.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.24.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.24.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.25.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.25.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.25.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.26.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.26.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.26.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.27.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.27.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.27.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.28.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.28.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.28.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.29.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.29.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.29.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.30.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.30.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.30.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.31.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.31.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.31.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.32.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.32.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.32.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.33.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.33.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.33.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.34.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.34.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.34.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.35.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.35.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.35.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.36.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.36.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.36.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.37.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.37.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.37.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.38.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.38.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.38.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.39.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.39.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.39.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.40.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.40.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.40.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.41.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.41.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.41.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.42.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.42.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.42.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.43.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.43.w2.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.43.w3.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.44.w1.weight": "model-00052-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.44.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.44.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.45.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.45.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.45.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.46.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.46.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.46.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.47.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.47.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.47.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.48.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.48.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.48.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.49.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.49.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.49.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.50.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.50.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.50.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.51.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.51.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.51.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.52.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.52.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.52.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.53.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.53.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.53.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.54.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.54.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.54.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.55.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.55.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.55.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.56.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.56.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.56.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.57.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.57.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.57.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.58.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.58.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.58.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.59.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.59.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.59.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.60.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.60.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.60.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.61.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.61.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.61.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.62.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.62.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.62.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.63.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.63.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.63.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.64.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.64.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.64.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.65.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.65.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.65.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.66.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.66.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.66.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.67.w1.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.67.w2.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.67.w3.weight": "model-00053-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.68.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.68.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.68.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.69.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.69.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.69.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.70.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.70.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.70.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.71.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.71.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.71.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.72.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.72.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.72.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.73.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.73.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.73.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.74.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.74.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.74.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.75.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.75.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.75.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.76.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.76.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.76.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.77.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.77.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.77.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.78.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.78.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.78.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.79.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.79.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.79.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.8.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.8.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.8.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.80.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.80.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.80.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.81.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.81.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.81.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.82.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.82.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.82.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.83.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.83.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.83.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.84.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.84.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.84.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.85.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.85.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.85.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.86.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.86.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.86.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.87.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.87.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.87.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.88.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.88.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.88.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.89.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.89.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.89.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.9.w1.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.9.w2.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.9.w3.weight": "model-00051-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.90.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.90.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.90.w3.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.91.w1.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.91.w2.weight": "model-00054-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.91.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.92.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.92.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.92.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.93.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.93.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.93.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.94.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.94.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.94.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.95.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.95.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.95.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.96.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.96.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.96.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.97.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.97.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.97.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.98.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.98.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.98.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.99.w1.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.99.w2.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.experts.99.w3.weight": "model-00055-of-00195.safetensors", - "model.layers.9.block_sparse_moe.gate.weight": "model-00051-of-00195.safetensors", - "model.layers.9.input_layernorm.weight": "model-00056-of-00195.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00056-of-00195.safetensors", - "model.layers.9.residual_layernorm.weight": "model-00056-of-00195.safetensors", - "model.layers.9.residual_mlp.w1.weight": "model-00056-of-00195.safetensors", - "model.layers.9.residual_mlp.w2.weight": "model-00056-of-00195.safetensors", - "model.layers.9.residual_mlp.w3.weight": "model-00056-of-00195.safetensors", - "model.layers.9.self_attn.k_proj.weight": "model-00051-of-00195.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00051-of-00195.safetensors", - "model.layers.9.self_attn.q_proj.weight": "model-00051-of-00195.safetensors", - "model.layers.9.self_attn.v_proj.weight": "model-00051-of-00195.safetensors", - "model.norm.weight": "model-00195-of-00195.safetensors" + "lm_head.weight": "model-00194-of-00194.safetensors", + "model.embed_tokens.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.100.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.100.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.100.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.101.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.101.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.101.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.102.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.102.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.102.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.103.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.103.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.103.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.104.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.104.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.104.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.105.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.105.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.105.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.106.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.106.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.106.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.107.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.107.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.107.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.108.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.108.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.108.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.109.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.109.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.109.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.110.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.110.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.110.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.111.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.111.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.111.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.112.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.112.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.112.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.113.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.113.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.113.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.114.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.114.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.114.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.115.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.115.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.115.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.116.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.116.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.116.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.117.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.117.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.117.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.118.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.118.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.118.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.119.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.119.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.119.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.120.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.120.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.120.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.121.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.121.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.121.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.122.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.122.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.122.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.123.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.123.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.123.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.124.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.124.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.124.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.125.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.125.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.125.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.126.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.126.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.126.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.127.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.127.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.127.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.16.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.16.w2.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.16.w3.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.17.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.17.w2.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.17.w3.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.18.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.18.w2.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.18.w3.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.19.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.19.w2.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.19.w3.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.20.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.20.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.20.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.21.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.21.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.21.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.22.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.22.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.22.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.23.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.23.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.23.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.24.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.24.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.24.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.25.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.25.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.25.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.26.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.26.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.26.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.27.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.27.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.27.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.28.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.28.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.28.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.29.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.29.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.29.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.30.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.30.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.30.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.31.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.31.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.31.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.32.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.32.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.32.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.33.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.33.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.33.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.34.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.34.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.34.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.35.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.35.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.35.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.36.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.36.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.36.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.37.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.37.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.37.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.38.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.38.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.38.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.39.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.39.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.39.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.40.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.40.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.40.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.41.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.41.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.41.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.42.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.42.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.42.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.43.w1.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.43.w2.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.43.w3.weight": "model-00002-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.44.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.44.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.44.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.45.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.45.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.45.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.46.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.46.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.46.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.47.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.47.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.47.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.48.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.48.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.48.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.49.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.49.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.49.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.50.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.50.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.50.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.51.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.51.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.51.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.52.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.52.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.52.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.53.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.53.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.53.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.54.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.54.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.54.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.55.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.55.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.55.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.56.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.56.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.56.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.57.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.57.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.57.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.58.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.58.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.58.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.59.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.59.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.59.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.60.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.60.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.60.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.61.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.61.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.61.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.62.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.62.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.62.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.63.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.63.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.63.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.64.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.64.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.64.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.65.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.65.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.65.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.66.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.66.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.66.w3.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.67.w1.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.67.w2.weight": "model-00003-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.67.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.68.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.68.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.68.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.69.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.69.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.69.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.70.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.70.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.70.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.71.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.71.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.71.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.72.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.72.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.72.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.73.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.73.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.73.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.74.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.74.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.74.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.75.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.75.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.75.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.76.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.76.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.76.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.77.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.77.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.77.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.78.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.78.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.78.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.79.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.79.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.79.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.80.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.80.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.80.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.81.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.81.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.81.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.82.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.82.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.82.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.83.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.83.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.83.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.84.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.84.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.84.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.85.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.85.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.85.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.86.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.86.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.86.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.87.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.87.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.87.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.88.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.88.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.88.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.89.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.89.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.89.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.90.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.90.w2.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.90.w3.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.91.w1.weight": "model-00004-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.91.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.91.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.92.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.92.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.92.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.93.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.93.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.93.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.94.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.94.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.94.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.95.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.95.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.95.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.96.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.96.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.96.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.97.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.97.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.97.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.98.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.98.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.98.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.99.w1.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.99.w2.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.experts.99.w3.weight": "model-00005-of-00194.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00194.safetensors", + "model.layers.0.input_layernorm.weight": "model-00006-of-00194.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00006-of-00194.safetensors", + "model.layers.0.residual_layernorm.weight": "model-00006-of-00194.safetensors", + "model.layers.0.residual_mlp.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.0.residual_mlp.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.0.residual_mlp.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00194.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00194.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00194.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.10.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.10.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.10.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.100.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.100.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.100.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.101.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.101.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.101.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.102.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.102.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.102.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.103.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.103.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.103.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.104.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.104.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.104.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.105.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.105.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.105.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.106.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.106.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.106.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.107.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.107.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.107.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.108.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.108.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.108.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.109.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.109.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.109.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.11.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.11.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.11.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.110.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.110.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.110.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.111.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.111.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.111.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.112.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.112.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.112.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.113.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.113.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.113.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.114.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.114.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.114.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.115.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.115.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.115.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.116.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.116.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.116.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.117.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.117.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.117.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.118.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.118.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.118.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.119.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.119.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.119.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.12.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.12.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.12.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.120.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.120.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.120.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.121.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.121.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.121.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.122.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.122.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.122.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.123.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.123.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.123.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.124.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.124.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.124.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.125.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.125.w2.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.125.w3.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.126.w1.weight": "model-00011-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.126.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.126.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.127.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.127.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.127.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.13.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.13.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.13.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.14.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.14.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.14.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.15.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.15.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.15.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.16.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.16.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.16.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.17.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.17.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.17.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.18.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.18.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.18.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.19.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.19.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.19.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.20.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.20.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.20.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.21.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.21.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.21.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.22.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.22.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.22.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.23.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.23.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.23.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.24.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.24.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.24.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.25.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.25.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.25.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.26.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.26.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.26.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.27.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.27.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.27.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.28.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.28.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.28.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.29.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.29.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.29.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.30.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.30.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.30.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.31.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.31.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.31.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.32.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.32.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.32.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.33.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.33.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.33.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.34.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.34.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.34.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.35.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.35.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.35.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.36.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.36.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.36.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.37.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.37.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.37.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.38.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.38.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.38.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.39.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.39.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.39.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.40.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.40.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.40.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.41.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.41.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.41.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.42.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.42.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.42.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.43.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.43.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.43.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.44.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.44.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.44.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.45.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.45.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.45.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.46.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.46.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.46.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.47.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.47.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.47.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.48.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.48.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.48.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.49.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.49.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.49.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.50.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.50.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.50.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.51.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.51.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.51.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.52.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.52.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.52.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.53.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.53.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.53.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.54.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.54.w2.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.54.w3.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.55.w1.weight": "model-00008-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.55.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.55.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.56.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.56.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.56.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.57.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.57.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.57.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.58.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.58.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.58.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.59.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.59.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.59.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.60.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.60.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.60.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.61.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.61.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.61.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.62.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.62.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.62.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.63.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.63.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.63.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.64.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.64.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.64.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.65.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.65.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.65.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.66.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.66.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.66.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.67.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.67.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.67.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.68.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.68.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.68.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.69.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.69.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.69.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.70.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.70.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.70.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.71.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.71.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.71.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.72.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.72.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.72.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.73.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.73.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.73.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.74.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.74.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.74.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.75.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.75.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.75.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.76.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.76.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.76.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.77.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.77.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.77.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.78.w1.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.78.w2.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.78.w3.weight": "model-00009-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.79.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.79.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.79.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.8.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.8.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.8.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.80.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.80.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.80.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.81.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.81.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.81.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.82.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.82.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.82.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.83.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.83.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.83.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.84.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.84.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.84.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.85.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.85.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.85.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.86.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.86.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.86.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.87.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.87.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.87.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.88.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.88.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.88.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.89.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.89.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.89.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.9.w1.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.9.w2.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.9.w3.weight": "model-00007-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.90.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.90.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.90.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.91.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.91.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.91.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.92.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.92.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.92.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.93.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.93.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.93.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.94.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.94.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.94.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.95.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.95.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.95.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.96.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.96.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.96.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.97.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.97.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.97.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.98.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.98.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.98.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.99.w1.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.99.w2.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.experts.99.w3.weight": "model-00010-of-00194.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00006-of-00194.safetensors", + "model.layers.1.input_layernorm.weight": "model-00012-of-00194.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00012-of-00194.safetensors", + "model.layers.1.residual_layernorm.weight": "model-00012-of-00194.safetensors", + "model.layers.1.residual_mlp.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.1.residual_mlp.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.1.residual_mlp.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00006-of-00194.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00006-of-00194.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00006-of-00194.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00006-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.10.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.10.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.10.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.100.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.100.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.100.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.101.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.101.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.101.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.102.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.102.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.102.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.103.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.103.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.103.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.104.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.104.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.104.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.105.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.105.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.105.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.106.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.106.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.106.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.107.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.107.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.107.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.108.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.108.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.108.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.109.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.109.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.109.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.11.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.11.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.11.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.110.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.110.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.110.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.111.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.111.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.111.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.112.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.112.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.112.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.113.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.113.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.113.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.114.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.114.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.114.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.115.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.115.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.115.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.116.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.116.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.116.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.117.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.117.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.117.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.118.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.118.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.118.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.119.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.119.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.119.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.12.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.12.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.12.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.120.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.120.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.120.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.121.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.121.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.121.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.122.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.122.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.122.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.123.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.123.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.123.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.124.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.124.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.124.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.125.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.125.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.125.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.126.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.126.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.126.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.127.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.127.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.127.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.13.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.13.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.13.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.14.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.14.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.14.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.15.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.15.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.15.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.16.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.16.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.16.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.17.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.17.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.17.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.18.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.18.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.18.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.19.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.19.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.19.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.20.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.20.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.20.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.21.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.21.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.21.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.22.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.22.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.22.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.23.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.23.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.23.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.24.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.24.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.24.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.25.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.25.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.25.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.26.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.26.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.26.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.27.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.27.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.27.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.28.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.28.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.28.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.29.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.29.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.29.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.30.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.30.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.30.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.31.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.31.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.31.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.32.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.32.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.32.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.33.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.33.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.33.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.34.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.34.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.34.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.35.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.35.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.35.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.36.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.36.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.36.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.37.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.37.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.37.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.38.w1.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.38.w2.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.38.w3.weight": "model-00057-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.39.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.39.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.39.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.40.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.40.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.40.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.41.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.41.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.41.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.42.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.42.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.42.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.43.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.43.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.43.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.44.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.44.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.44.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.45.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.45.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.45.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.46.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.46.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.46.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.47.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.47.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.47.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.48.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.48.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.48.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.49.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.49.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.49.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.50.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.50.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.50.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.51.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.51.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.51.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.52.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.52.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.52.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.53.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.53.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.53.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.54.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.54.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.54.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.55.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.55.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.55.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.56.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.56.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.56.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.57.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.57.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.57.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.58.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.58.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.58.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.59.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.59.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.59.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.60.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.60.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.60.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.61.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.61.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.61.w3.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.62.w1.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.62.w2.weight": "model-00058-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.62.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.63.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.63.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.63.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.64.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.64.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.64.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.65.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.65.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.65.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.66.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.66.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.66.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.67.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.67.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.67.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.68.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.68.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.68.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.69.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.69.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.69.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.70.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.70.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.70.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.71.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.71.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.71.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.72.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.72.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.72.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.73.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.73.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.73.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.74.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.74.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.74.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.75.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.75.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.75.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.76.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.76.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.76.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.77.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.77.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.77.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.78.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.78.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.78.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.79.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.79.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.79.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.8.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.8.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.8.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.80.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.80.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.80.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.81.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.81.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.81.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.82.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.82.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.82.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.83.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.83.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.83.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.84.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.84.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.84.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.85.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.85.w2.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.85.w3.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.86.w1.weight": "model-00059-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.86.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.86.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.87.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.87.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.87.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.88.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.88.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.88.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.89.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.89.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.89.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.9.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.9.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.9.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.90.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.90.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.90.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.91.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.91.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.91.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.92.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.92.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.92.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.93.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.93.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.93.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.94.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.94.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.94.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.95.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.95.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.95.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.96.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.96.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.96.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.97.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.97.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.97.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.98.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.98.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.98.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.99.w1.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.99.w2.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.experts.99.w3.weight": "model-00060-of-00194.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00056-of-00194.safetensors", + "model.layers.10.input_layernorm.weight": "model-00061-of-00194.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00061-of-00194.safetensors", + "model.layers.10.residual_layernorm.weight": "model-00061-of-00194.safetensors", + "model.layers.10.residual_mlp.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.10.residual_mlp.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.10.residual_mlp.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00056-of-00194.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00056-of-00194.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00056-of-00194.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00056-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.10.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.10.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.10.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.100.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.100.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.100.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.101.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.101.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.101.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.102.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.102.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.102.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.103.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.103.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.103.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.104.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.104.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.104.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.105.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.105.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.105.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.106.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.106.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.106.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.107.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.107.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.107.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.108.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.108.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.108.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.109.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.109.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.109.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.11.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.11.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.11.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.110.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.110.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.110.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.111.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.111.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.111.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.112.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.112.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.112.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.113.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.113.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.113.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.114.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.114.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.114.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.115.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.115.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.115.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.116.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.116.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.116.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.117.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.117.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.117.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.118.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.118.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.118.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.119.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.119.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.119.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.12.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.12.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.12.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.120.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.120.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.120.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.121.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.121.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.121.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.122.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.122.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.122.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.123.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.123.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.123.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.124.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.124.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.124.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.125.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.125.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.125.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.126.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.126.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.126.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.127.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.127.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.127.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.13.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.13.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.13.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.14.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.14.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.14.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.15.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.15.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.15.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.16.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.16.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.16.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.17.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.17.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.17.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.18.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.18.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.18.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.19.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.19.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.19.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00061-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00061-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00061-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.20.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.20.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.20.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.21.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.21.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.21.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.22.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.22.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.22.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.23.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.23.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.23.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.24.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.24.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.24.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.25.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.25.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.25.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.26.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.26.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.26.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.27.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.27.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.27.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.28.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.28.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.28.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.29.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.29.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.29.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.30.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.30.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.30.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.31.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.31.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.31.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.32.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.32.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.32.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.33.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.33.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.33.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.34.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.34.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.34.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.35.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.35.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.35.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.36.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.36.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.36.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.37.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.37.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.37.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.38.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.38.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.38.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.39.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.39.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.39.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.40.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.40.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.40.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.41.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.41.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.41.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.42.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.42.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.42.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.43.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.43.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.43.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.44.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.44.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.44.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.45.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.45.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.45.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.46.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.46.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.46.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.47.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.47.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.47.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.48.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.48.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.48.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.49.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.49.w2.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.49.w3.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.50.w1.weight": "model-00063-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.50.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.50.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.51.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.51.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.51.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.52.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.52.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.52.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.53.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.53.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.53.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.54.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.54.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.54.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.55.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.55.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.55.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.56.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.56.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.56.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.57.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.57.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.57.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.58.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.58.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.58.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.59.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.59.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.59.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.60.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.60.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.60.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.61.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.61.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.61.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.62.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.62.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.62.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.63.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.63.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.63.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.64.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.64.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.64.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.65.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.65.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.65.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.66.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.66.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.66.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.67.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.67.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.67.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.68.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.68.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.68.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.69.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.69.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.69.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.70.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.70.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.70.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.71.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.71.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.71.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.72.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.72.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.72.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.73.w1.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.73.w2.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.73.w3.weight": "model-00064-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.74.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.74.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.74.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.75.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.75.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.75.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.76.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.76.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.76.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.77.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.77.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.77.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.78.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.78.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.78.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.79.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.79.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.79.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.8.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.8.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.8.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.80.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.80.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.80.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.81.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.81.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.81.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.82.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.82.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.82.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.83.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.83.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.83.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.84.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.84.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.84.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.85.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.85.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.85.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.86.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.86.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.86.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.87.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.87.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.87.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.88.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.88.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.88.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.89.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.89.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.89.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.9.w1.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.9.w2.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.9.w3.weight": "model-00062-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.90.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.90.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.90.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.91.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.91.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.91.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.92.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.92.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.92.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.93.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.93.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.93.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.94.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.94.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.94.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.95.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.95.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.95.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.96.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.96.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.96.w3.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.97.w1.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.97.w2.weight": "model-00065-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.97.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.98.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.98.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.98.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.99.w1.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.99.w2.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.experts.99.w3.weight": "model-00066-of-00194.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00061-of-00194.safetensors", + "model.layers.11.input_layernorm.weight": "model-00067-of-00194.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00067-of-00194.safetensors", + "model.layers.11.residual_layernorm.weight": "model-00067-of-00194.safetensors", + "model.layers.11.residual_mlp.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.11.residual_mlp.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.11.residual_mlp.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00061-of-00194.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00061-of-00194.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00061-of-00194.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00061-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.10.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.10.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.10.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.100.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.100.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.100.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.101.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.101.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.101.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.102.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.102.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.102.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.103.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.103.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.103.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.104.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.104.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.104.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.105.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.105.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.105.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.106.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.106.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.106.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.107.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.107.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.107.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.108.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.108.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.108.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.109.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.109.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.109.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.11.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.11.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.11.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.110.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.110.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.110.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.111.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.111.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.111.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.112.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.112.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.112.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.113.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.113.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.113.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.114.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.114.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.114.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.115.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.115.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.115.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.116.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.116.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.116.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.117.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.117.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.117.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.118.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.118.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.118.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.119.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.119.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.119.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.12.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.12.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.12.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.120.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.120.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.120.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.121.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.121.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.121.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.122.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.122.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.122.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.123.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.123.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.123.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.124.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.124.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.124.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.125.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.125.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.125.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.126.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.126.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.126.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.127.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.127.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.127.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.13.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.13.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.13.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.14.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.14.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.14.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.15.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.15.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.15.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.16.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.16.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.16.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.17.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.17.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.17.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.18.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.18.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.18.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.19.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.19.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.19.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.20.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.20.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.20.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.21.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.21.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.21.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.22.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.22.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.22.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.23.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.23.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.23.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.24.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.24.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.24.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.25.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.25.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.25.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.26.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.26.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.26.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.27.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.27.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.27.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.28.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.28.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.28.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.29.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.29.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.29.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.30.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.30.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.30.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.31.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.31.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.31.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.32.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.32.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.32.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.33.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.33.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.33.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.34.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.34.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.34.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.35.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.35.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.35.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.36.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.36.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.36.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.37.w1.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.37.w2.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.37.w3.weight": "model-00068-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.38.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.38.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.38.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.39.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.39.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.39.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.40.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.40.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.40.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.41.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.41.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.41.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.42.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.42.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.42.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.43.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.43.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.43.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.44.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.44.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.44.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.45.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.45.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.45.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.46.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.46.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.46.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.47.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.47.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.47.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.48.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.48.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.48.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.49.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.49.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.49.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.50.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.50.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.50.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.51.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.51.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.51.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.52.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.52.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.52.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.53.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.53.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.53.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.54.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.54.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.54.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.55.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.55.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.55.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.56.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.56.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.56.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.57.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.57.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.57.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.58.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.58.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.58.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.59.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.59.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.59.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.60.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.60.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.60.w3.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.61.w1.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.61.w2.weight": "model-00069-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.61.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.62.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.62.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.62.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.63.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.63.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.63.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.64.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.64.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.64.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.65.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.65.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.65.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.66.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.66.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.66.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.67.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.67.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.67.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.68.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.68.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.68.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.69.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.69.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.69.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.70.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.70.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.70.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.71.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.71.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.71.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.72.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.72.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.72.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.73.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.73.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.73.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.74.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.74.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.74.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.75.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.75.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.75.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.76.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.76.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.76.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.77.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.77.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.77.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.78.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.78.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.78.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.79.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.79.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.79.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.8.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.8.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.8.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.80.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.80.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.80.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.81.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.81.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.81.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.82.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.82.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.82.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.83.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.83.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.83.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.84.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.84.w2.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.84.w3.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.85.w1.weight": "model-00070-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.85.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.85.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.86.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.86.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.86.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.87.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.87.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.87.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.88.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.88.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.88.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.89.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.89.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.89.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.9.w1.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.9.w2.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.9.w3.weight": "model-00067-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.90.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.90.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.90.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.91.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.91.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.91.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.92.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.92.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.92.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.93.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.93.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.93.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.94.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.94.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.94.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.95.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.95.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.95.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.96.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.96.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.96.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.97.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.97.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.97.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.98.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.98.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.98.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.99.w1.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.99.w2.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.experts.99.w3.weight": "model-00071-of-00194.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00067-of-00194.safetensors", + "model.layers.12.input_layernorm.weight": "model-00072-of-00194.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00072-of-00194.safetensors", + "model.layers.12.residual_layernorm.weight": "model-00072-of-00194.safetensors", + "model.layers.12.residual_mlp.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.12.residual_mlp.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.12.residual_mlp.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00067-of-00194.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00067-of-00194.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00067-of-00194.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00067-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00072-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00072-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00072-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.10.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.10.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.10.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.100.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.100.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.100.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.101.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.101.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.101.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.102.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.102.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.102.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.103.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.103.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.103.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.104.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.104.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.104.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.105.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.105.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.105.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.106.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.106.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.106.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.107.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.107.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.107.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.108.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.108.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.108.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.109.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.109.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.109.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.11.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.11.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.11.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.110.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.110.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.110.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.111.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.111.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.111.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.112.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.112.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.112.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.113.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.113.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.113.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.114.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.114.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.114.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.115.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.115.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.115.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.116.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.116.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.116.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.117.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.117.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.117.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.118.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.118.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.118.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.119.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.119.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.119.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.12.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.12.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.12.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.120.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.120.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.120.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.121.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.121.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.121.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.122.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.122.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.122.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.123.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.123.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.123.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.124.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.124.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.124.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.125.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.125.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.125.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.126.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.126.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.126.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.127.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.127.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.127.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.13.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.13.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.13.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.14.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.14.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.14.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.15.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.15.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.15.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.16.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.16.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.16.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.17.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.17.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.17.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.18.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.18.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.18.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.19.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.19.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.19.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.20.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.20.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.20.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.21.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.21.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.21.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.22.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.22.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.22.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.23.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.23.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.23.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.24.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.24.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.24.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.25.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.25.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.25.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.26.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.26.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.26.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.27.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.27.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.27.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.28.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.28.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.28.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.29.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.29.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.29.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.30.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.30.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.30.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.31.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.31.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.31.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.32.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.32.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.32.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.33.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.33.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.33.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.34.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.34.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.34.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.35.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.35.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.35.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.36.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.36.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.36.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.37.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.37.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.37.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.38.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.38.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.38.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.39.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.39.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.39.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.40.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.40.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.40.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.41.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.41.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.41.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.42.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.42.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.42.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.43.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.43.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.43.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.44.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.44.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.44.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.45.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.45.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.45.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.46.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.46.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.46.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.47.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.47.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.47.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.48.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.48.w2.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.48.w3.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.49.w1.weight": "model-00074-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.49.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.49.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.50.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.50.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.50.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.51.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.51.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.51.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.52.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.52.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.52.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.53.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.53.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.53.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.54.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.54.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.54.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.55.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.55.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.55.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.56.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.56.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.56.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.57.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.57.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.57.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.58.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.58.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.58.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.59.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.59.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.59.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.60.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.60.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.60.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.61.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.61.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.61.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.62.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.62.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.62.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.63.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.63.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.63.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.64.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.64.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.64.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.65.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.65.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.65.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.66.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.66.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.66.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.67.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.67.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.67.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.68.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.68.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.68.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.69.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.69.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.69.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.70.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.70.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.70.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.71.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.71.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.71.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.72.w1.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.72.w2.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.72.w3.weight": "model-00075-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.73.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.73.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.73.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.74.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.74.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.74.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.75.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.75.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.75.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.76.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.76.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.76.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.77.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.77.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.77.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.78.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.78.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.78.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.79.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.79.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.79.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.8.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.8.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.8.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.80.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.80.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.80.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.81.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.81.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.81.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.82.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.82.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.82.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.83.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.83.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.83.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.84.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.84.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.84.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.85.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.85.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.85.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.86.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.86.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.86.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.87.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.87.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.87.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.88.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.88.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.88.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.89.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.89.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.89.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.9.w1.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.9.w2.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.9.w3.weight": "model-00073-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.90.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.90.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.90.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.91.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.91.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.91.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.92.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.92.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.92.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.93.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.93.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.93.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.94.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.94.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.94.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.95.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.95.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.95.w3.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.96.w1.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.96.w2.weight": "model-00076-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.96.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.97.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.97.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.97.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.98.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.98.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.98.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.99.w1.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.99.w2.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.experts.99.w3.weight": "model-00077-of-00194.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00072-of-00194.safetensors", + "model.layers.13.input_layernorm.weight": "model-00078-of-00194.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00078-of-00194.safetensors", + "model.layers.13.residual_layernorm.weight": "model-00078-of-00194.safetensors", + "model.layers.13.residual_mlp.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.13.residual_mlp.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.13.residual_mlp.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00072-of-00194.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00072-of-00194.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00072-of-00194.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00072-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.10.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.10.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.10.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.100.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.100.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.100.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.101.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.101.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.101.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.102.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.102.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.102.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.103.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.103.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.103.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.104.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.104.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.104.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.105.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.105.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.105.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.106.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.106.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.106.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.107.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.107.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.107.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.108.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.108.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.108.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.109.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.109.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.109.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.11.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.11.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.11.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.110.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.110.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.110.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.111.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.111.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.111.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.112.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.112.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.112.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.113.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.113.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.113.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.114.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.114.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.114.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.115.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.115.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.115.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.116.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.116.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.116.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.117.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.117.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.117.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.118.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.118.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.118.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.119.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.119.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.119.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.12.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.12.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.12.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.120.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.120.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.120.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.121.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.121.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.121.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.122.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.122.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.122.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.123.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.123.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.123.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.124.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.124.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.124.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.125.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.125.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.125.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.126.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.126.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.126.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.127.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.127.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.127.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.13.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.13.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.13.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.14.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.14.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.14.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.15.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.15.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.15.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.16.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.16.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.16.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.17.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.17.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.17.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.18.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.18.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.18.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.19.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.19.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.19.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.20.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.20.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.20.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.21.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.21.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.21.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.22.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.22.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.22.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.23.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.23.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.23.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.24.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.24.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.24.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.25.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.25.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.25.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.26.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.26.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.26.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.27.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.27.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.27.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.28.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.28.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.28.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.29.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.29.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.29.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.30.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.30.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.30.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.31.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.31.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.31.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.32.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.32.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.32.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.33.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.33.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.33.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.34.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.34.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.34.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.35.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.35.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.35.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.36.w1.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.36.w2.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.36.w3.weight": "model-00079-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.37.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.37.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.37.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.38.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.38.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.38.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.39.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.39.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.39.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.40.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.40.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.40.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.41.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.41.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.41.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.42.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.42.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.42.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.43.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.43.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.43.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.44.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.44.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.44.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.45.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.45.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.45.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.46.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.46.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.46.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.47.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.47.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.47.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.48.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.48.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.48.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.49.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.49.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.49.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.50.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.50.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.50.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.51.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.51.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.51.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.52.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.52.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.52.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.53.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.53.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.53.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.54.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.54.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.54.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.55.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.55.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.55.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.56.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.56.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.56.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.57.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.57.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.57.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.58.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.58.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.58.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.59.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.59.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.59.w3.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.60.w1.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.60.w2.weight": "model-00080-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.60.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.61.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.61.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.61.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.62.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.62.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.62.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.63.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.63.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.63.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.64.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.64.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.64.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.65.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.65.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.65.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.66.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.66.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.66.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.67.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.67.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.67.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.68.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.68.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.68.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.69.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.69.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.69.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.70.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.70.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.70.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.71.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.71.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.71.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.72.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.72.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.72.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.73.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.73.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.73.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.74.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.74.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.74.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.75.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.75.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.75.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.76.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.76.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.76.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.77.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.77.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.77.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.78.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.78.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.78.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.79.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.79.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.79.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.8.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.8.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.8.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.80.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.80.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.80.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.81.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.81.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.81.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.82.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.82.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.82.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.83.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.83.w2.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.83.w3.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.84.w1.weight": "model-00081-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.84.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.84.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.85.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.85.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.85.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.86.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.86.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.86.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.87.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.87.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.87.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.88.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.88.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.88.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.89.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.89.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.89.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.9.w1.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.9.w2.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.9.w3.weight": "model-00078-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.90.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.90.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.90.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.91.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.91.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.91.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.92.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.92.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.92.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.93.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.93.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.93.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.94.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.94.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.94.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.95.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.95.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.95.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.96.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.96.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.96.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.97.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.97.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.97.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.98.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.98.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.98.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.99.w1.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.99.w2.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.experts.99.w3.weight": "model-00082-of-00194.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00078-of-00194.safetensors", + "model.layers.14.input_layernorm.weight": "model-00083-of-00194.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00083-of-00194.safetensors", + "model.layers.14.residual_layernorm.weight": "model-00083-of-00194.safetensors", + "model.layers.14.residual_mlp.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.14.residual_mlp.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.14.residual_mlp.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00078-of-00194.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00078-of-00194.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00078-of-00194.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00078-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00083-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00083-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00083-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.10.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.10.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.10.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.100.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.100.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.100.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.101.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.101.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.101.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.102.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.102.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.102.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.103.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.103.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.103.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.104.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.104.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.104.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.105.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.105.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.105.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.106.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.106.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.106.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.107.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.107.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.107.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.108.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.108.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.108.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.109.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.109.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.109.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.11.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.11.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.11.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.110.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.110.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.110.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.111.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.111.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.111.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.112.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.112.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.112.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.113.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.113.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.113.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.114.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.114.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.114.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.115.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.115.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.115.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.116.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.116.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.116.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.117.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.117.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.117.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.118.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.118.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.118.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.119.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.119.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.119.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.12.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.12.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.12.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.120.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.120.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.120.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.121.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.121.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.121.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.122.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.122.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.122.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.123.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.123.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.123.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.124.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.124.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.124.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.125.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.125.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.125.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.126.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.126.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.126.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.127.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.127.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.127.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.13.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.13.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.13.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.14.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.14.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.14.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.15.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.15.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.15.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.16.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.16.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.16.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.17.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.17.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.17.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.18.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.18.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.18.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.19.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.19.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.19.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.20.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.20.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.20.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.21.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.21.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.21.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.22.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.22.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.22.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.23.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.23.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.23.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.24.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.24.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.24.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.25.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.25.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.25.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.26.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.26.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.26.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.27.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.27.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.27.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.28.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.28.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.28.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.29.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.29.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.29.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.30.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.30.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.30.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.31.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.31.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.31.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.32.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.32.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.32.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.33.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.33.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.33.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.34.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.34.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.34.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.35.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.35.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.35.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.36.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.36.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.36.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.37.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.37.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.37.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.38.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.38.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.38.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.39.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.39.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.39.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.40.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.40.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.40.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.41.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.41.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.41.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.42.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.42.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.42.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.43.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.43.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.43.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.44.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.44.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.44.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.45.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.45.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.45.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.46.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.46.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.46.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.47.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.47.w2.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.47.w3.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.48.w1.weight": "model-00085-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.48.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.48.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.49.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.49.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.49.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.50.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.50.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.50.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.51.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.51.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.51.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.52.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.52.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.52.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.53.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.53.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.53.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.54.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.54.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.54.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.55.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.55.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.55.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.56.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.56.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.56.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.57.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.57.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.57.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.58.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.58.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.58.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.59.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.59.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.59.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.60.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.60.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.60.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.61.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.61.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.61.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.62.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.62.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.62.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.63.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.63.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.63.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.64.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.64.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.64.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.65.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.65.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.65.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.66.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.66.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.66.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.67.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.67.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.67.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.68.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.68.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.68.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.69.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.69.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.69.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.70.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.70.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.70.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.71.w1.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.71.w2.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.71.w3.weight": "model-00086-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.72.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.72.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.72.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.73.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.73.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.73.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.74.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.74.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.74.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.75.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.75.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.75.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.76.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.76.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.76.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.77.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.77.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.77.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.78.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.78.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.78.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.79.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.79.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.79.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.8.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.8.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.8.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.80.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.80.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.80.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.81.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.81.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.81.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.82.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.82.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.82.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.83.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.83.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.83.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.84.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.84.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.84.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.85.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.85.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.85.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.86.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.86.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.86.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.87.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.87.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.87.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.88.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.88.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.88.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.89.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.89.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.89.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.9.w1.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.9.w2.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.9.w3.weight": "model-00084-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.90.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.90.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.90.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.91.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.91.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.91.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.92.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.92.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.92.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.93.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.93.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.93.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.94.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.94.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.94.w3.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.95.w1.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.95.w2.weight": "model-00087-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.95.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.96.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.96.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.96.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.97.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.97.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.97.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.98.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.98.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.98.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.99.w1.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.99.w2.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.experts.99.w3.weight": "model-00088-of-00194.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00083-of-00194.safetensors", + "model.layers.15.input_layernorm.weight": "model-00089-of-00194.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00089-of-00194.safetensors", + "model.layers.15.residual_layernorm.weight": "model-00089-of-00194.safetensors", + "model.layers.15.residual_mlp.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.15.residual_mlp.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.15.residual_mlp.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00083-of-00194.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00083-of-00194.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00083-of-00194.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00083-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.10.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.10.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.10.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.100.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.100.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.100.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.101.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.101.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.101.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.102.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.102.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.102.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.103.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.103.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.103.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.104.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.104.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.104.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.105.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.105.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.105.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.106.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.106.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.106.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.107.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.107.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.107.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.108.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.108.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.108.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.109.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.109.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.109.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.11.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.11.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.11.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.110.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.110.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.110.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.111.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.111.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.111.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.112.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.112.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.112.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.113.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.113.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.113.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.114.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.114.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.114.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.115.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.115.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.115.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.116.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.116.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.116.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.117.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.117.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.117.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.118.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.118.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.118.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.119.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.119.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.119.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.12.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.12.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.12.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.120.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.120.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.120.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.121.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.121.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.121.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.122.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.122.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.122.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.123.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.123.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.123.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.124.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.124.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.124.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.125.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.125.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.125.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.126.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.126.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.126.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.127.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.127.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.127.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.13.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.13.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.13.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.14.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.14.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.14.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.15.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.15.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.15.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.16.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.16.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.16.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.17.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.17.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.17.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.18.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.18.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.18.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.19.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.19.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.19.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.20.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.20.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.20.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.21.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.21.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.21.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.22.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.22.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.22.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.23.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.23.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.23.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.24.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.24.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.24.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.25.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.25.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.25.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.26.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.26.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.26.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.27.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.27.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.27.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.28.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.28.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.28.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.29.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.29.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.29.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.30.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.30.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.30.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.31.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.31.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.31.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.32.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.32.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.32.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.33.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.33.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.33.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.34.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.34.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.34.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.35.w1.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.35.w2.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.35.w3.weight": "model-00090-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.36.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.36.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.36.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.37.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.37.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.37.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.38.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.38.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.38.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.39.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.39.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.39.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.40.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.40.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.40.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.41.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.41.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.41.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.42.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.42.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.42.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.43.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.43.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.43.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.44.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.44.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.44.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.45.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.45.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.45.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.46.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.46.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.46.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.47.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.47.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.47.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.48.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.48.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.48.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.49.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.49.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.49.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.50.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.50.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.50.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.51.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.51.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.51.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.52.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.52.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.52.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.53.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.53.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.53.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.54.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.54.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.54.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.55.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.55.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.55.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.56.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.56.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.56.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.57.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.57.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.57.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.58.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.58.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.58.w3.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.59.w1.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.59.w2.weight": "model-00091-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.59.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.60.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.60.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.60.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.61.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.61.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.61.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.62.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.62.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.62.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.63.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.63.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.63.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.64.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.64.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.64.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.65.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.65.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.65.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.66.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.66.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.66.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.67.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.67.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.67.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.68.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.68.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.68.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.69.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.69.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.69.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.70.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.70.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.70.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.71.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.71.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.71.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.72.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.72.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.72.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.73.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.73.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.73.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.74.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.74.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.74.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.75.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.75.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.75.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.76.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.76.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.76.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.77.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.77.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.77.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.78.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.78.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.78.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.79.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.79.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.79.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.8.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.8.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.8.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.80.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.80.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.80.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.81.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.81.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.81.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.82.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.82.w2.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.82.w3.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.83.w1.weight": "model-00092-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.83.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.83.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.84.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.84.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.84.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.85.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.85.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.85.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.86.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.86.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.86.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.87.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.87.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.87.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.88.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.88.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.88.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.89.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.89.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.89.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.9.w1.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.9.w2.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.9.w3.weight": "model-00089-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.90.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.90.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.90.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.91.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.91.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.91.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.92.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.92.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.92.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.93.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.93.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.93.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.94.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.94.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.94.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.95.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.95.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.95.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.96.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.96.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.96.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.97.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.97.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.97.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.98.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.98.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.98.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.99.w1.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.99.w2.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.experts.99.w3.weight": "model-00093-of-00194.safetensors", + "model.layers.16.block_sparse_moe.gate.weight": "model-00089-of-00194.safetensors", + "model.layers.16.input_layernorm.weight": "model-00094-of-00194.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00094-of-00194.safetensors", + "model.layers.16.residual_layernorm.weight": "model-00094-of-00194.safetensors", + "model.layers.16.residual_mlp.w1.weight": "model-00094-of-00194.safetensors", + "model.layers.16.residual_mlp.w2.weight": "model-00094-of-00194.safetensors", + "model.layers.16.residual_mlp.w3.weight": "model-00094-of-00194.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00089-of-00194.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00089-of-00194.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00089-of-00194.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00089-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.10.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.10.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.10.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.100.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.100.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.100.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.101.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.101.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.101.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.102.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.102.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.102.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.103.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.103.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.103.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.104.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.104.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.104.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.105.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.105.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.105.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.106.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.106.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.106.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.107.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.107.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.107.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.108.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.108.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.108.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.109.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.109.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.109.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.11.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.11.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.11.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.110.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.110.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.110.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.111.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.111.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.111.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.112.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.112.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.112.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.113.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.113.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.113.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.114.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.114.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.114.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.115.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.115.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.115.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.116.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.116.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.116.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.117.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.117.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.117.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.118.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.118.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.118.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.119.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.119.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.119.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.12.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.12.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.12.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.120.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.120.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.120.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.121.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.121.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.121.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.122.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.122.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.122.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.123.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.123.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.123.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.124.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.124.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.124.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.125.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.125.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.125.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.126.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.126.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.126.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.127.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.127.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.127.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.13.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.13.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.13.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.14.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.14.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.14.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.15.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.15.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.15.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.16.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.16.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.16.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.17.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.17.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.17.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.18.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.18.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.18.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.19.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.19.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.19.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.20.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.20.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.20.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.21.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.21.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.21.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.22.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.22.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.22.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.23.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.23.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.23.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.24.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.24.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.24.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.25.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.25.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.25.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.26.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.26.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.26.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.27.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.27.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.27.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.28.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.28.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.28.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.29.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.29.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.29.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.30.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.30.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.30.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.31.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.31.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.31.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.32.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.32.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.32.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.33.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.33.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.33.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.34.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.34.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.34.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.35.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.35.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.35.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.36.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.36.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.36.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.37.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.37.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.37.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.38.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.38.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.38.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.39.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.39.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.39.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.40.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.40.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.40.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.41.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.41.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.41.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.42.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.42.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.42.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.43.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.43.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.43.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.44.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.44.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.44.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.45.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.45.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.45.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.46.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.46.w2.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.46.w3.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.47.w1.weight": "model-00096-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.47.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.47.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.48.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.48.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.48.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.49.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.49.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.49.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.50.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.50.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.50.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.51.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.51.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.51.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.52.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.52.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.52.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.53.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.53.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.53.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.54.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.54.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.54.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.55.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.55.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.55.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.56.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.56.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.56.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.57.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.57.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.57.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.58.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.58.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.58.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.59.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.59.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.59.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.60.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.60.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.60.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.61.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.61.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.61.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.62.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.62.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.62.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.63.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.63.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.63.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.64.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.64.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.64.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.65.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.65.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.65.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.66.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.66.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.66.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.67.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.67.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.67.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.68.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.68.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.68.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.69.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.69.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.69.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.70.w1.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.70.w2.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.70.w3.weight": "model-00097-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.71.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.71.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.71.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.72.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.72.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.72.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.73.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.73.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.73.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.74.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.74.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.74.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.75.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.75.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.75.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.76.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.76.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.76.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.77.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.77.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.77.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.78.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.78.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.78.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.79.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.79.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.79.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.8.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.8.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.8.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.80.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.80.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.80.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.81.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.81.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.81.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.82.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.82.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.82.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.83.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.83.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.83.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.84.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.84.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.84.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.85.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.85.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.85.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.86.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.86.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.86.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.87.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.87.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.87.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.88.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.88.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.88.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.89.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.89.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.89.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.9.w1.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.9.w2.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.9.w3.weight": "model-00095-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.90.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.90.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.90.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.91.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.91.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.91.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.92.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.92.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.92.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.93.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.93.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.93.w3.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.94.w1.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.94.w2.weight": "model-00098-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.94.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.95.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.95.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.95.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.96.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.96.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.96.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.97.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.97.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.97.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.98.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.98.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.98.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.99.w1.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.99.w2.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.experts.99.w3.weight": "model-00099-of-00194.safetensors", + "model.layers.17.block_sparse_moe.gate.weight": "model-00094-of-00194.safetensors", + "model.layers.17.input_layernorm.weight": "model-00100-of-00194.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00100-of-00194.safetensors", + "model.layers.17.residual_layernorm.weight": "model-00100-of-00194.safetensors", + "model.layers.17.residual_mlp.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.17.residual_mlp.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.17.residual_mlp.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00094-of-00194.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00094-of-00194.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00094-of-00194.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00094-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.10.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.10.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.10.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.100.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.100.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.100.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.101.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.101.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.101.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.102.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.102.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.102.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.103.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.103.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.103.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.104.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.104.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.104.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.105.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.105.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.105.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.106.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.106.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.106.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.107.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.107.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.107.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.108.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.108.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.108.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.109.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.109.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.109.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.11.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.11.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.11.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.110.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.110.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.110.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.111.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.111.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.111.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.112.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.112.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.112.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.113.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.113.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.113.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.114.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.114.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.114.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.115.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.115.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.115.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.116.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.116.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.116.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.117.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.117.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.117.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.118.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.118.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.118.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.119.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.119.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.119.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.12.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.12.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.12.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.120.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.120.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.120.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.121.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.121.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.121.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.122.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.122.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.122.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.123.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.123.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.123.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.124.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.124.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.124.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.125.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.125.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.125.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.126.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.126.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.126.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.127.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.127.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.127.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.13.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.13.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.13.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.14.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.14.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.14.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.15.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.15.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.15.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.16.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.16.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.16.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.17.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.17.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.17.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.18.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.18.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.18.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.19.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.19.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.19.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.20.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.20.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.20.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.21.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.21.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.21.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.22.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.22.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.22.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.23.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.23.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.23.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.24.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.24.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.24.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.25.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.25.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.25.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.26.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.26.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.26.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.27.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.27.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.27.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.28.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.28.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.28.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.29.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.29.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.29.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.30.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.30.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.30.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.31.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.31.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.31.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.32.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.32.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.32.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.33.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.33.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.33.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.34.w1.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.34.w2.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.34.w3.weight": "model-00101-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.35.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.35.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.35.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.36.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.36.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.36.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.37.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.37.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.37.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.38.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.38.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.38.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.39.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.39.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.39.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.40.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.40.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.40.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.41.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.41.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.41.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.42.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.42.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.42.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.43.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.43.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.43.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.44.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.44.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.44.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.45.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.45.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.45.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.46.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.46.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.46.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.47.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.47.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.47.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.48.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.48.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.48.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.49.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.49.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.49.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.50.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.50.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.50.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.51.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.51.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.51.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.52.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.52.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.52.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.53.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.53.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.53.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.54.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.54.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.54.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.55.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.55.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.55.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.56.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.56.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.56.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.57.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.57.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.57.w3.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.58.w1.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.58.w2.weight": "model-00102-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.58.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.59.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.59.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.59.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.60.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.60.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.60.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.61.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.61.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.61.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.62.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.62.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.62.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.63.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.63.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.63.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.64.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.64.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.64.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.65.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.65.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.65.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.66.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.66.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.66.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.67.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.67.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.67.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.68.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.68.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.68.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.69.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.69.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.69.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.70.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.70.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.70.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.71.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.71.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.71.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.72.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.72.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.72.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.73.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.73.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.73.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.74.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.74.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.74.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.75.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.75.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.75.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.76.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.76.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.76.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.77.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.77.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.77.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.78.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.78.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.78.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.79.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.79.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.79.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.8.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.8.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.8.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.80.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.80.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.80.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.81.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.81.w2.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.81.w3.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.82.w1.weight": "model-00103-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.82.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.82.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.83.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.83.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.83.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.84.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.84.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.84.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.85.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.85.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.85.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.86.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.86.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.86.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.87.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.87.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.87.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.88.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.88.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.88.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.89.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.89.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.89.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.9.w1.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.9.w2.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.9.w3.weight": "model-00100-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.90.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.90.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.90.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.91.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.91.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.91.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.92.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.92.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.92.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.93.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.93.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.93.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.94.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.94.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.94.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.95.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.95.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.95.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.96.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.96.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.96.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.97.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.97.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.97.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.98.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.98.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.98.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.99.w1.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.99.w2.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.experts.99.w3.weight": "model-00104-of-00194.safetensors", + "model.layers.18.block_sparse_moe.gate.weight": "model-00100-of-00194.safetensors", + "model.layers.18.input_layernorm.weight": "model-00105-of-00194.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00105-of-00194.safetensors", + "model.layers.18.residual_layernorm.weight": "model-00105-of-00194.safetensors", + "model.layers.18.residual_mlp.w1.weight": "model-00105-of-00194.safetensors", + "model.layers.18.residual_mlp.w2.weight": "model-00105-of-00194.safetensors", + "model.layers.18.residual_mlp.w3.weight": "model-00105-of-00194.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00100-of-00194.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00100-of-00194.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00100-of-00194.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00100-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.10.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.10.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.10.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.100.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.100.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.100.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.101.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.101.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.101.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.102.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.102.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.102.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.103.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.103.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.103.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.104.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.104.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.104.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.105.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.105.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.105.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.106.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.106.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.106.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.107.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.107.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.107.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.108.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.108.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.108.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.109.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.109.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.109.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.11.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.11.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.11.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.110.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.110.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.110.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.111.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.111.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.111.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.112.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.112.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.112.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.113.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.113.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.113.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.114.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.114.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.114.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.115.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.115.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.115.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.116.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.116.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.116.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.117.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.117.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.117.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.118.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.118.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.118.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.119.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.119.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.119.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.12.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.12.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.12.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.120.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.120.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.120.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.121.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.121.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.121.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.122.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.122.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.122.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.123.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.123.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.123.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.124.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.124.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.124.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.125.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.125.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.125.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.126.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.126.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.126.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.127.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.127.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.127.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.13.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.13.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.13.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.14.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.14.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.14.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.15.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.15.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.15.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.16.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.16.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.16.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.17.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.17.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.17.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.18.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.18.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.18.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.19.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.19.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.19.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.20.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.20.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.20.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.21.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.21.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.21.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.22.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.22.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.22.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.23.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.23.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.23.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.24.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.24.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.24.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.25.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.25.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.25.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.26.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.26.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.26.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.27.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.27.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.27.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.28.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.28.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.28.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.29.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.29.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.29.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.30.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.30.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.30.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.31.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.31.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.31.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.32.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.32.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.32.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.33.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.33.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.33.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.34.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.34.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.34.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.35.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.35.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.35.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.36.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.36.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.36.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.37.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.37.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.37.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.38.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.38.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.38.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.39.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.39.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.39.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.40.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.40.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.40.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.41.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.41.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.41.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.42.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.42.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.42.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.43.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.43.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.43.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.44.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.44.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.44.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.45.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.45.w2.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.45.w3.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.46.w1.weight": "model-00107-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.46.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.46.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.47.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.47.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.47.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.48.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.48.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.48.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.49.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.49.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.49.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.50.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.50.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.50.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.51.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.51.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.51.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.52.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.52.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.52.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.53.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.53.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.53.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.54.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.54.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.54.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.55.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.55.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.55.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.56.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.56.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.56.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.57.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.57.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.57.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.58.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.58.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.58.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.59.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.59.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.59.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.60.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.60.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.60.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.61.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.61.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.61.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.62.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.62.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.62.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.63.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.63.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.63.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.64.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.64.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.64.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.65.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.65.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.65.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.66.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.66.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.66.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.67.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.67.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.67.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.68.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.68.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.68.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.69.w1.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.69.w2.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.69.w3.weight": "model-00108-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.70.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.70.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.70.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.71.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.71.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.71.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.72.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.72.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.72.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.73.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.73.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.73.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.74.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.74.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.74.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.75.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.75.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.75.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.76.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.76.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.76.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.77.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.77.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.77.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.78.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.78.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.78.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.79.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.79.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.79.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.8.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.8.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.8.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.80.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.80.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.80.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.81.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.81.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.81.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.82.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.82.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.82.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.83.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.83.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.83.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.84.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.84.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.84.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.85.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.85.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.85.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.86.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.86.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.86.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.87.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.87.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.87.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.88.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.88.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.88.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.89.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.89.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.89.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.9.w1.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.9.w2.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.9.w3.weight": "model-00106-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.90.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.90.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.90.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.91.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.91.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.91.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.92.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.92.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.92.w3.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.93.w1.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.93.w2.weight": "model-00109-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.93.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.94.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.94.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.94.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.95.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.95.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.95.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.96.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.96.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.96.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.97.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.97.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.97.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.98.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.98.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.98.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.99.w1.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.99.w2.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.experts.99.w3.weight": "model-00110-of-00194.safetensors", + "model.layers.19.block_sparse_moe.gate.weight": "model-00106-of-00194.safetensors", + "model.layers.19.input_layernorm.weight": "model-00111-of-00194.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00111-of-00194.safetensors", + "model.layers.19.residual_layernorm.weight": "model-00111-of-00194.safetensors", + "model.layers.19.residual_mlp.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.19.residual_mlp.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.19.residual_mlp.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00106-of-00194.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00106-of-00194.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00106-of-00194.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00106-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.10.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.10.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.10.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.100.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.100.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.100.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.101.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.101.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.101.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.102.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.102.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.102.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.103.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.103.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.103.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.104.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.104.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.104.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.105.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.105.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.105.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.106.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.106.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.106.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.107.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.107.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.107.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.108.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.108.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.108.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.109.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.109.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.109.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.11.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.11.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.11.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.110.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.110.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.110.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.111.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.111.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.111.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.112.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.112.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.112.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.113.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.113.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.113.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.114.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.114.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.114.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.115.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.115.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.115.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.116.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.116.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.116.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.117.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.117.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.117.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.118.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.118.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.118.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.119.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.119.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.119.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.12.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.12.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.12.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.120.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.120.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.120.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.121.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.121.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.121.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.122.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.122.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.122.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.123.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.123.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.123.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.124.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.124.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.124.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.125.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.125.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.125.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.126.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.126.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.126.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.127.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.127.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.127.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.13.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.13.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.13.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.14.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.14.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.14.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.15.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.15.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.15.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.16.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.16.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.16.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.17.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.17.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.17.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.18.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.18.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.18.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.19.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.19.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.19.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.20.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.20.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.20.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.21.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.21.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.21.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.22.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.22.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.22.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.23.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.23.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.23.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.24.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.24.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.24.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.25.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.25.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.25.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.26.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.26.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.26.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.27.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.27.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.27.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.28.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.28.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.28.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.29.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.29.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.29.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.30.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.30.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.30.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.31.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.31.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.31.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.32.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.32.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.32.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.33.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.33.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.33.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.34.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.34.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.34.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.35.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.35.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.35.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.36.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.36.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.36.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.37.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.37.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.37.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.38.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.38.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.38.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.39.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.39.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.39.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.40.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.40.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.40.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.41.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.41.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.41.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.42.w1.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.42.w2.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.42.w3.weight": "model-00013-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.43.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.43.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.43.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.44.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.44.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.44.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.45.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.45.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.45.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.46.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.46.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.46.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.47.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.47.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.47.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.48.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.48.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.48.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.49.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.49.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.49.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.50.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.50.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.50.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.51.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.51.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.51.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.52.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.52.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.52.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.53.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.53.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.53.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.54.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.54.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.54.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.55.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.55.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.55.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.56.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.56.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.56.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.57.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.57.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.57.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.58.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.58.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.58.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.59.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.59.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.59.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.60.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.60.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.60.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.61.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.61.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.61.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.62.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.62.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.62.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.63.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.63.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.63.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.64.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.64.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.64.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.65.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.65.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.65.w3.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.66.w1.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.66.w2.weight": "model-00014-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.66.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.67.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.67.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.67.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.68.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.68.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.68.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.69.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.69.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.69.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.70.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.70.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.70.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.71.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.71.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.71.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.72.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.72.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.72.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.73.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.73.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.73.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.74.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.74.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.74.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.75.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.75.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.75.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.76.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.76.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.76.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.77.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.77.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.77.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.78.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.78.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.78.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.79.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.79.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.79.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.8.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.8.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.8.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.80.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.80.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.80.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.81.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.81.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.81.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.82.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.82.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.82.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.83.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.83.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.83.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.84.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.84.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.84.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.85.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.85.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.85.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.86.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.86.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.86.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.87.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.87.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.87.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.88.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.88.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.88.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.89.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.89.w2.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.89.w3.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.9.w1.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.9.w2.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.9.w3.weight": "model-00012-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.90.w1.weight": "model-00015-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.90.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.90.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.91.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.91.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.91.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.92.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.92.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.92.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.93.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.93.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.93.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.94.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.94.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.94.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.95.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.95.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.95.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.96.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.96.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.96.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.97.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.97.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.97.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.98.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.98.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.98.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.99.w1.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.99.w2.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.experts.99.w3.weight": "model-00016-of-00194.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00012-of-00194.safetensors", + "model.layers.2.input_layernorm.weight": "model-00017-of-00194.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00017-of-00194.safetensors", + "model.layers.2.residual_layernorm.weight": "model-00017-of-00194.safetensors", + "model.layers.2.residual_mlp.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.2.residual_mlp.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.2.residual_mlp.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00012-of-00194.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00012-of-00194.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00012-of-00194.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00012-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.10.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.10.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.10.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.100.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.100.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.100.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.101.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.101.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.101.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.102.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.102.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.102.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.103.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.103.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.103.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.104.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.104.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.104.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.105.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.105.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.105.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.106.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.106.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.106.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.107.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.107.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.107.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.108.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.108.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.108.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.109.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.109.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.109.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.11.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.11.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.11.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.110.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.110.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.110.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.111.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.111.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.111.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.112.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.112.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.112.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.113.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.113.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.113.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.114.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.114.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.114.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.115.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.115.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.115.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.116.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.116.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.116.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.117.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.117.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.117.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.118.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.118.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.118.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.119.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.119.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.119.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.12.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.12.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.12.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.120.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.120.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.120.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.121.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.121.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.121.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.122.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.122.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.122.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.123.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.123.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.123.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.124.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.124.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.124.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.125.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.125.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.125.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.126.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.126.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.126.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.127.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.127.w2.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.127.w3.weight": "model-00116-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.13.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.13.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.13.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.14.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.14.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.14.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.15.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.15.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.15.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.16.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.16.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.16.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.17.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.17.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.17.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.18.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.18.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.18.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.19.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.19.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.19.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.20.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.20.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.20.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.21.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.21.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.21.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.22.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.22.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.22.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.23.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.23.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.23.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.24.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.24.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.24.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.25.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.25.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.25.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.26.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.26.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.26.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.27.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.27.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.27.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.28.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.28.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.28.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.29.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.29.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.29.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.30.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.30.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.30.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.31.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.31.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.31.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.32.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.32.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.32.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.33.w1.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.33.w2.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.33.w3.weight": "model-00112-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.34.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.34.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.34.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.35.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.35.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.35.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.36.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.36.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.36.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.37.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.37.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.37.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.38.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.38.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.38.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.39.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.39.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.39.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.40.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.40.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.40.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.41.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.41.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.41.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.42.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.42.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.42.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.43.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.43.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.43.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.44.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.44.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.44.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.45.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.45.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.45.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.46.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.46.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.46.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.47.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.47.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.47.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.48.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.48.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.48.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.49.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.49.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.49.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.50.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.50.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.50.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.51.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.51.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.51.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.52.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.52.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.52.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.53.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.53.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.53.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.54.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.54.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.54.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.55.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.55.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.55.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.56.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.56.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.56.w3.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.57.w1.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.57.w2.weight": "model-00113-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.57.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.58.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.58.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.58.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.59.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.59.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.59.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.60.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.60.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.60.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.61.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.61.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.61.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.62.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.62.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.62.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.63.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.63.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.63.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.64.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.64.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.64.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.65.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.65.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.65.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.66.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.66.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.66.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.67.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.67.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.67.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.68.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.68.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.68.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.69.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.69.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.69.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.70.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.70.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.70.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.71.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.71.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.71.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.72.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.72.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.72.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.73.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.73.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.73.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.74.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.74.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.74.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.75.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.75.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.75.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.76.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.76.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.76.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.77.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.77.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.77.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.78.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.78.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.78.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.79.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.79.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.79.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.8.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.8.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.8.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.80.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.80.w2.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.80.w3.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.81.w1.weight": "model-00114-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.81.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.81.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.82.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.82.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.82.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.83.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.83.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.83.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.84.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.84.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.84.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.85.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.85.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.85.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.86.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.86.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.86.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.87.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.87.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.87.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.88.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.88.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.88.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.89.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.89.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.89.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.9.w1.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.9.w2.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.9.w3.weight": "model-00111-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.90.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.90.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.90.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.91.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.91.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.91.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.92.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.92.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.92.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.93.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.93.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.93.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.94.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.94.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.94.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.95.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.95.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.95.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.96.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.96.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.96.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.97.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.97.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.97.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.98.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.98.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.98.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.99.w1.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.99.w2.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.experts.99.w3.weight": "model-00115-of-00194.safetensors", + "model.layers.20.block_sparse_moe.gate.weight": "model-00111-of-00194.safetensors", + "model.layers.20.input_layernorm.weight": "model-00116-of-00194.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00116-of-00194.safetensors", + "model.layers.20.residual_layernorm.weight": "model-00116-of-00194.safetensors", + "model.layers.20.residual_mlp.w1.weight": "model-00116-of-00194.safetensors", + "model.layers.20.residual_mlp.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.20.residual_mlp.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00111-of-00194.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00111-of-00194.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00111-of-00194.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00111-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.10.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.10.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.10.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.100.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.100.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.100.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.101.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.101.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.101.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.102.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.102.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.102.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.103.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.103.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.103.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.104.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.104.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.104.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.105.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.105.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.105.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.106.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.106.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.106.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.107.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.107.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.107.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.108.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.108.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.108.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.109.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.109.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.109.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.11.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.11.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.11.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.110.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.110.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.110.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.111.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.111.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.111.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.112.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.112.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.112.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.113.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.113.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.113.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.114.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.114.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.114.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.115.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.115.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.115.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.116.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.116.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.116.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.117.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.117.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.117.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.118.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.118.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.118.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.119.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.119.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.119.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.12.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.12.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.12.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.120.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.120.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.120.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.121.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.121.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.121.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.122.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.122.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.122.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.123.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.123.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.123.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.124.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.124.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.124.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.125.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.125.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.125.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.126.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.126.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.126.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.127.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.127.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.127.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.13.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.13.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.13.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.14.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.14.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.14.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.15.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.15.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.15.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.16.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.16.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.16.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.17.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.17.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.17.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.18.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.18.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.18.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.19.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.19.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.19.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.20.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.20.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.20.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.21.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.21.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.21.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.22.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.22.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.22.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.23.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.23.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.23.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.24.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.24.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.24.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.25.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.25.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.25.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.26.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.26.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.26.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.27.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.27.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.27.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.28.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.28.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.28.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.29.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.29.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.29.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.30.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.30.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.30.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.31.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.31.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.31.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.32.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.32.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.32.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.33.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.33.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.33.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.34.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.34.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.34.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.35.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.35.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.35.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.36.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.36.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.36.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.37.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.37.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.37.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.38.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.38.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.38.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.39.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.39.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.39.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.40.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.40.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.40.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.41.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.41.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.41.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.42.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.42.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.42.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.43.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.43.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.43.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.44.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.44.w2.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.44.w3.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.45.w1.weight": "model-00118-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.45.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.45.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.46.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.46.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.46.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.47.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.47.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.47.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.48.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.48.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.48.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.49.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.49.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.49.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.50.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.50.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.50.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.51.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.51.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.51.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.52.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.52.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.52.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.53.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.53.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.53.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.54.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.54.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.54.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.55.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.55.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.55.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.56.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.56.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.56.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.57.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.57.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.57.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.58.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.58.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.58.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.59.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.59.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.59.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.60.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.60.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.60.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.61.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.61.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.61.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.62.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.62.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.62.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.63.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.63.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.63.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.64.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.64.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.64.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.65.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.65.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.65.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.66.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.66.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.66.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.67.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.67.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.67.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.68.w1.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.68.w2.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.68.w3.weight": "model-00119-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.69.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.69.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.69.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.70.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.70.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.70.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.71.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.71.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.71.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.72.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.72.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.72.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.73.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.73.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.73.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.74.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.74.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.74.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.75.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.75.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.75.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.76.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.76.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.76.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.77.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.77.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.77.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.78.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.78.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.78.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.79.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.79.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.79.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.8.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.8.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.8.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.80.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.80.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.80.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.81.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.81.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.81.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.82.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.82.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.82.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.83.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.83.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.83.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.84.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.84.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.84.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.85.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.85.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.85.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.86.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.86.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.86.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.87.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.87.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.87.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.88.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.88.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.88.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.89.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.89.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.89.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.9.w1.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.9.w2.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.9.w3.weight": "model-00117-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.90.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.90.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.90.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.91.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.91.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.91.w3.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.92.w1.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.92.w2.weight": "model-00120-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.92.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.93.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.93.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.93.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.94.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.94.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.94.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.95.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.95.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.95.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.96.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.96.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.96.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.97.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.97.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.97.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.98.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.98.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.98.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.99.w1.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.99.w2.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.experts.99.w3.weight": "model-00121-of-00194.safetensors", + "model.layers.21.block_sparse_moe.gate.weight": "model-00117-of-00194.safetensors", + "model.layers.21.input_layernorm.weight": "model-00122-of-00194.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00122-of-00194.safetensors", + "model.layers.21.residual_layernorm.weight": "model-00122-of-00194.safetensors", + "model.layers.21.residual_mlp.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.21.residual_mlp.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.21.residual_mlp.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00117-of-00194.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00117-of-00194.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00117-of-00194.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00117-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.10.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.10.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.10.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.100.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.100.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.100.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.101.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.101.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.101.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.102.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.102.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.102.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.103.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.103.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.103.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.104.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.104.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.104.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.105.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.105.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.105.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.106.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.106.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.106.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.107.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.107.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.107.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.108.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.108.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.108.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.109.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.109.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.109.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.11.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.11.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.11.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.110.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.110.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.110.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.111.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.111.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.111.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.112.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.112.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.112.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.113.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.113.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.113.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.114.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.114.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.114.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.115.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.115.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.115.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.116.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.116.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.116.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.117.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.117.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.117.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.118.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.118.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.118.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.119.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.119.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.119.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.12.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.12.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.12.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.120.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.120.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.120.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.121.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.121.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.121.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.122.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.122.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.122.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.123.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.123.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.123.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.124.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.124.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.124.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.125.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.125.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.125.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.126.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.126.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.126.w3.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.127.w1.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.127.w2.weight": "model-00127-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.127.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.13.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.13.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.13.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.14.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.14.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.14.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.15.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.15.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.15.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.16.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.16.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.16.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.17.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.17.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.17.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.18.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.18.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.18.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.19.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.19.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.19.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.20.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.20.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.20.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.21.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.21.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.21.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.22.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.22.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.22.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.23.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.23.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.23.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.24.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.24.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.24.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.25.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.25.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.25.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.26.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.26.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.26.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.27.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.27.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.27.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.28.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.28.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.28.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.29.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.29.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.29.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.30.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.30.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.30.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.31.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.31.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.31.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.32.w1.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.32.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.32.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.33.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.33.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.33.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.34.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.34.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.34.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.35.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.35.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.35.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.36.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.36.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.36.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.37.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.37.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.37.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.38.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.38.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.38.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.39.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.39.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.39.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.40.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.40.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.40.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.41.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.41.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.41.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.42.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.42.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.42.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.43.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.43.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.43.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.44.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.44.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.44.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.45.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.45.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.45.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.46.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.46.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.46.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.47.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.47.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.47.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.48.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.48.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.48.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.49.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.49.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.49.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.50.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.50.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.50.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.51.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.51.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.51.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.52.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.52.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.52.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.53.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.53.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.53.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.54.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.54.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.54.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.55.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.55.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.55.w3.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.56.w1.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.56.w2.weight": "model-00124-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.56.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.57.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.57.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.57.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.58.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.58.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.58.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.59.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.59.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.59.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.60.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.60.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.60.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.61.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.61.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.61.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.62.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.62.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.62.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.63.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.63.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.63.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.64.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.64.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.64.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.65.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.65.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.65.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.66.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.66.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.66.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.67.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.67.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.67.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.68.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.68.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.68.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.69.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.69.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.69.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.70.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.70.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.70.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.71.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.71.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.71.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.72.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.72.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.72.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.73.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.73.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.73.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.74.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.74.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.74.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.75.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.75.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.75.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.76.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.76.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.76.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.77.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.77.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.77.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.78.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.78.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.78.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.79.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.79.w2.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.79.w3.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.8.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.8.w2.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.8.w3.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.80.w1.weight": "model-00125-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.80.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.80.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.81.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.81.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.81.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.82.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.82.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.82.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.83.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.83.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.83.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.84.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.84.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.84.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.85.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.85.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.85.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.86.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.86.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.86.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.87.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.87.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.87.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.88.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.88.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.88.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.89.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.89.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.89.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.9.w1.weight": "model-00122-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.9.w2.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.9.w3.weight": "model-00123-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.90.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.90.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.90.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.91.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.91.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.91.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.92.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.92.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.92.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.93.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.93.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.93.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.94.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.94.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.94.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.95.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.95.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.95.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.96.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.96.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.96.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.97.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.97.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.97.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.98.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.98.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.98.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.99.w1.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.99.w2.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.experts.99.w3.weight": "model-00126-of-00194.safetensors", + "model.layers.22.block_sparse_moe.gate.weight": "model-00122-of-00194.safetensors", + "model.layers.22.input_layernorm.weight": "model-00128-of-00194.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00128-of-00194.safetensors", + "model.layers.22.residual_layernorm.weight": "model-00128-of-00194.safetensors", + "model.layers.22.residual_mlp.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.22.residual_mlp.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.22.residual_mlp.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00122-of-00194.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00122-of-00194.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00122-of-00194.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00122-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.10.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.10.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.10.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.100.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.100.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.100.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.101.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.101.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.101.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.102.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.102.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.102.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.103.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.103.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.103.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.104.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.104.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.104.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.105.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.105.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.105.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.106.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.106.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.106.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.107.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.107.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.107.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.108.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.108.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.108.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.109.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.109.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.109.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.11.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.11.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.11.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.110.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.110.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.110.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.111.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.111.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.111.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.112.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.112.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.112.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.113.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.113.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.113.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.114.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.114.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.114.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.115.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.115.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.115.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.116.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.116.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.116.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.117.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.117.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.117.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.118.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.118.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.118.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.119.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.119.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.119.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.12.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.12.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.12.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.120.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.120.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.120.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.121.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.121.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.121.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.122.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.122.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.122.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.123.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.123.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.123.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.124.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.124.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.124.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.125.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.125.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.125.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.126.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.126.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.126.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.127.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.127.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.127.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.13.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.13.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.13.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.14.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.14.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.14.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.15.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.15.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.15.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.16.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.16.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.16.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.17.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.17.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.17.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.18.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.18.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.18.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.19.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.19.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.19.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.20.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.20.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.20.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.21.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.21.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.21.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.22.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.22.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.22.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.23.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.23.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.23.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.24.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.24.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.24.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.25.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.25.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.25.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.26.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.26.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.26.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.27.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.27.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.27.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.28.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.28.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.28.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.29.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.29.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.29.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.30.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.30.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.30.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.31.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.31.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.31.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.32.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.32.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.32.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.33.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.33.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.33.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.34.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.34.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.34.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.35.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.35.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.35.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.36.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.36.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.36.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.37.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.37.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.37.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.38.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.38.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.38.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.39.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.39.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.39.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.40.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.40.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.40.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.41.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.41.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.41.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.42.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.42.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.42.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.43.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.43.w2.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.43.w3.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.44.w1.weight": "model-00129-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.44.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.44.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.45.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.45.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.45.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.46.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.46.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.46.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.47.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.47.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.47.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.48.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.48.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.48.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.49.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.49.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.49.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.50.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.50.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.50.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.51.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.51.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.51.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.52.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.52.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.52.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.53.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.53.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.53.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.54.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.54.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.54.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.55.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.55.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.55.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.56.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.56.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.56.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.57.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.57.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.57.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.58.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.58.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.58.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.59.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.59.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.59.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.60.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.60.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.60.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.61.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.61.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.61.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.62.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.62.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.62.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.63.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.63.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.63.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.64.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.64.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.64.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.65.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.65.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.65.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.66.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.66.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.66.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.67.w1.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.67.w2.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.67.w3.weight": "model-00130-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.68.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.68.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.68.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.69.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.69.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.69.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.70.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.70.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.70.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.71.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.71.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.71.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.72.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.72.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.72.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.73.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.73.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.73.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.74.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.74.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.74.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.75.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.75.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.75.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.76.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.76.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.76.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.77.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.77.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.77.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.78.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.78.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.78.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.79.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.79.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.79.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.8.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.8.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.8.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.80.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.80.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.80.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.81.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.81.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.81.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.82.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.82.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.82.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.83.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.83.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.83.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.84.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.84.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.84.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.85.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.85.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.85.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.86.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.86.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.86.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.87.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.87.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.87.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.88.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.88.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.88.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.89.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.89.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.89.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.9.w1.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.9.w2.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.9.w3.weight": "model-00128-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.90.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.90.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.90.w3.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.91.w1.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.91.w2.weight": "model-00131-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.91.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.92.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.92.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.92.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.93.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.93.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.93.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.94.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.94.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.94.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.95.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.95.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.95.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.96.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.96.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.96.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.97.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.97.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.97.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.98.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.98.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.98.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.99.w1.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.99.w2.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.experts.99.w3.weight": "model-00132-of-00194.safetensors", + "model.layers.23.block_sparse_moe.gate.weight": "model-00128-of-00194.safetensors", + "model.layers.23.input_layernorm.weight": "model-00133-of-00194.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00133-of-00194.safetensors", + "model.layers.23.residual_layernorm.weight": "model-00133-of-00194.safetensors", + "model.layers.23.residual_mlp.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.23.residual_mlp.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.23.residual_mlp.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00128-of-00194.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00128-of-00194.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00128-of-00194.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00128-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.10.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.10.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.10.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.100.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.100.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.100.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.101.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.101.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.101.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.102.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.102.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.102.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.103.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.103.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.103.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.104.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.104.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.104.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.105.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.105.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.105.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.106.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.106.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.106.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.107.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.107.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.107.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.108.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.108.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.108.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.109.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.109.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.109.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.11.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.11.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.11.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.110.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.110.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.110.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.111.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.111.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.111.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.112.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.112.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.112.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.113.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.113.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.113.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.114.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.114.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.114.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.115.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.115.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.115.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.116.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.116.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.116.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.117.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.117.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.117.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.118.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.118.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.118.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.119.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.119.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.119.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.12.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.12.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.12.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.120.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.120.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.120.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.121.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.121.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.121.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.122.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.122.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.122.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.123.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.123.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.123.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.124.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.124.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.124.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.125.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.125.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.125.w3.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.126.w1.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.126.w2.weight": "model-00138-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.126.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.127.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.127.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.127.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.13.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.13.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.13.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.14.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.14.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.14.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.15.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.15.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.15.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.16.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.16.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.16.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.17.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.17.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.17.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.18.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.18.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.18.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.19.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.19.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.19.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.20.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.20.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.20.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.21.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.21.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.21.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.22.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.22.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.22.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.23.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.23.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.23.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.24.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.24.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.24.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.25.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.25.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.25.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.26.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.26.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.26.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.27.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.27.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.27.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.28.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.28.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.28.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.29.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.29.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.29.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.30.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.30.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.30.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.31.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.31.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.31.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.32.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.32.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.32.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.33.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.33.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.33.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.34.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.34.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.34.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.35.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.35.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.35.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.36.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.36.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.36.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.37.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.37.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.37.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.38.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.38.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.38.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.39.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.39.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.39.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.40.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.40.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.40.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.41.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.41.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.41.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.42.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.42.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.42.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.43.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.43.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.43.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.44.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.44.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.44.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.45.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.45.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.45.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.46.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.46.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.46.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.47.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.47.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.47.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.48.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.48.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.48.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.49.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.49.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.49.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.50.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.50.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.50.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.51.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.51.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.51.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.52.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.52.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.52.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.53.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.53.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.53.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.54.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.54.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.54.w3.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.55.w1.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.55.w2.weight": "model-00135-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.55.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.56.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.56.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.56.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.57.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.57.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.57.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.58.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.58.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.58.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.59.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.59.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.59.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.60.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.60.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.60.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.61.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.61.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.61.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.62.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.62.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.62.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.63.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.63.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.63.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.64.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.64.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.64.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.65.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.65.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.65.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.66.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.66.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.66.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.67.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.67.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.67.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.68.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.68.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.68.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.69.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.69.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.69.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.70.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.70.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.70.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.71.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.71.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.71.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.72.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.72.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.72.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.73.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.73.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.73.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.74.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.74.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.74.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.75.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.75.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.75.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.76.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.76.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.76.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.77.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.77.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.77.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.78.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.78.w2.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.78.w3.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.79.w1.weight": "model-00136-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.79.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.79.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.8.w1.weight": "model-00133-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.8.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.8.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.80.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.80.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.80.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.81.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.81.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.81.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.82.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.82.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.82.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.83.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.83.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.83.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.84.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.84.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.84.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.85.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.85.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.85.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.86.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.86.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.86.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.87.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.87.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.87.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.88.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.88.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.88.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.89.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.89.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.89.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.9.w1.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.9.w2.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.9.w3.weight": "model-00134-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.90.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.90.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.90.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.91.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.91.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.91.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.92.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.92.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.92.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.93.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.93.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.93.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.94.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.94.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.94.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.95.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.95.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.95.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.96.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.96.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.96.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.97.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.97.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.97.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.98.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.98.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.98.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.99.w1.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.99.w2.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.experts.99.w3.weight": "model-00137-of-00194.safetensors", + "model.layers.24.block_sparse_moe.gate.weight": "model-00133-of-00194.safetensors", + "model.layers.24.input_layernorm.weight": "model-00139-of-00194.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00139-of-00194.safetensors", + "model.layers.24.residual_layernorm.weight": "model-00139-of-00194.safetensors", + "model.layers.24.residual_mlp.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.24.residual_mlp.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.24.residual_mlp.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00133-of-00194.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00133-of-00194.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00133-of-00194.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00133-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.10.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.10.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.10.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.100.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.100.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.100.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.101.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.101.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.101.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.102.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.102.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.102.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.103.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.103.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.103.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.104.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.104.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.104.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.105.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.105.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.105.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.106.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.106.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.106.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.107.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.107.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.107.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.108.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.108.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.108.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.109.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.109.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.109.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.11.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.11.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.11.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.110.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.110.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.110.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.111.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.111.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.111.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.112.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.112.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.112.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.113.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.113.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.113.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.114.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.114.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.114.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.115.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.115.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.115.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.116.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.116.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.116.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.117.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.117.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.117.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.118.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.118.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.118.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.119.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.119.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.119.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.12.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.12.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.12.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.120.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.120.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.120.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.121.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.121.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.121.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.122.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.122.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.122.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.123.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.123.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.123.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.124.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.124.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.124.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.125.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.125.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.125.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.126.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.126.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.126.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.127.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.127.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.127.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.13.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.13.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.13.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.14.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.14.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.14.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.15.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.15.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.15.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.16.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.16.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.16.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.17.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.17.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.17.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.18.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.18.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.18.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.19.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.19.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.19.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.20.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.20.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.20.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.21.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.21.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.21.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.22.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.22.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.22.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.23.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.23.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.23.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.24.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.24.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.24.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.25.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.25.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.25.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.26.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.26.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.26.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.27.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.27.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.27.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.28.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.28.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.28.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.29.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.29.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.29.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.30.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.30.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.30.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.31.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.31.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.31.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.32.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.32.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.32.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.33.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.33.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.33.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.34.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.34.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.34.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.35.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.35.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.35.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.36.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.36.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.36.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.37.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.37.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.37.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.38.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.38.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.38.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.39.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.39.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.39.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.40.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.40.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.40.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.41.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.41.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.41.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.42.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.42.w2.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.42.w3.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.43.w1.weight": "model-00140-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.43.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.43.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.44.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.44.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.44.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.45.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.45.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.45.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.46.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.46.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.46.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.47.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.47.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.47.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.48.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.48.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.48.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.49.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.49.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.49.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.50.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.50.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.50.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.51.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.51.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.51.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.52.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.52.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.52.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.53.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.53.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.53.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.54.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.54.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.54.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.55.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.55.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.55.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.56.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.56.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.56.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.57.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.57.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.57.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.58.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.58.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.58.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.59.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.59.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.59.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.60.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.60.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.60.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.61.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.61.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.61.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.62.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.62.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.62.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.63.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.63.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.63.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.64.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.64.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.64.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.65.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.65.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.65.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.66.w1.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.66.w2.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.66.w3.weight": "model-00141-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.67.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.67.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.67.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.68.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.68.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.68.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.69.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.69.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.69.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.70.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.70.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.70.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.71.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.71.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.71.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.72.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.72.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.72.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.73.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.73.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.73.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.74.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.74.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.74.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.75.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.75.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.75.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.76.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.76.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.76.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.77.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.77.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.77.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.78.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.78.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.78.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.79.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.79.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.79.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.8.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.8.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.8.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.80.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.80.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.80.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.81.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.81.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.81.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.82.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.82.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.82.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.83.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.83.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.83.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.84.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.84.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.84.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.85.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.85.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.85.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.86.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.86.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.86.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.87.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.87.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.87.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.88.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.88.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.88.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.89.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.89.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.89.w3.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.9.w1.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.9.w2.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.9.w3.weight": "model-00139-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.90.w1.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.90.w2.weight": "model-00142-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.90.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.91.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.91.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.91.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.92.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.92.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.92.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.93.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.93.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.93.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.94.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.94.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.94.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.95.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.95.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.95.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.96.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.96.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.96.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.97.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.97.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.97.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.98.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.98.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.98.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.99.w1.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.99.w2.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.experts.99.w3.weight": "model-00143-of-00194.safetensors", + "model.layers.25.block_sparse_moe.gate.weight": "model-00139-of-00194.safetensors", + "model.layers.25.input_layernorm.weight": "model-00144-of-00194.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00144-of-00194.safetensors", + "model.layers.25.residual_layernorm.weight": "model-00144-of-00194.safetensors", + "model.layers.25.residual_mlp.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.25.residual_mlp.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.25.residual_mlp.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00139-of-00194.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00139-of-00194.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00139-of-00194.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00139-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.10.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.10.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.10.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.100.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.100.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.100.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.101.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.101.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.101.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.102.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.102.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.102.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.103.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.103.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.103.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.104.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.104.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.104.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.105.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.105.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.105.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.106.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.106.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.106.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.107.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.107.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.107.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.108.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.108.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.108.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.109.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.109.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.109.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.11.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.11.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.11.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.110.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.110.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.110.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.111.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.111.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.111.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.112.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.112.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.112.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.113.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.113.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.113.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.114.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.114.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.114.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.115.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.115.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.115.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.116.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.116.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.116.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.117.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.117.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.117.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.118.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.118.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.118.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.119.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.119.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.119.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.12.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.12.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.12.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.120.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.120.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.120.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.121.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.121.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.121.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.122.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.122.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.122.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.123.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.123.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.123.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.124.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.124.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.124.w3.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.125.w1.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.125.w2.weight": "model-00149-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.125.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.126.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.126.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.126.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.127.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.127.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.127.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.13.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.13.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.13.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.14.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.14.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.14.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.15.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.15.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.15.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.16.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.16.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.16.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.17.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.17.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.17.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.18.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.18.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.18.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.19.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.19.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.19.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.20.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.20.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.20.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.21.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.21.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.21.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.22.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.22.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.22.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.23.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.23.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.23.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.24.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.24.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.24.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.25.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.25.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.25.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.26.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.26.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.26.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.27.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.27.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.27.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.28.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.28.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.28.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.29.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.29.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.29.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.30.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.30.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.30.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.31.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.31.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.31.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.32.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.32.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.32.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.33.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.33.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.33.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.34.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.34.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.34.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.35.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.35.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.35.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.36.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.36.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.36.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.37.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.37.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.37.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.38.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.38.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.38.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.39.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.39.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.39.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.40.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.40.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.40.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.41.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.41.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.41.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.42.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.42.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.42.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.43.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.43.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.43.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.44.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.44.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.44.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.45.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.45.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.45.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.46.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.46.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.46.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.47.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.47.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.47.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.48.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.48.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.48.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.49.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.49.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.49.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.50.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.50.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.50.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.51.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.51.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.51.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.52.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.52.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.52.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.53.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.53.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.53.w3.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.54.w1.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.54.w2.weight": "model-00146-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.54.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.55.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.55.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.55.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.56.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.56.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.56.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.57.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.57.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.57.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.58.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.58.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.58.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.59.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.59.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.59.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.60.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.60.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.60.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.61.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.61.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.61.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.62.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.62.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.62.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.63.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.63.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.63.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.64.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.64.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.64.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.65.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.65.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.65.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.66.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.66.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.66.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.67.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.67.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.67.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.68.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.68.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.68.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.69.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.69.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.69.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00144-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.70.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.70.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.70.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.71.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.71.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.71.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.72.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.72.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.72.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.73.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.73.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.73.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.74.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.74.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.74.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.75.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.75.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.75.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.76.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.76.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.76.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.77.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.77.w2.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.77.w3.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.78.w1.weight": "model-00147-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.78.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.78.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.79.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.79.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.79.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.8.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.8.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.8.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.80.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.80.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.80.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.81.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.81.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.81.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.82.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.82.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.82.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.83.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.83.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.83.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.84.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.84.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.84.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.85.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.85.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.85.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.86.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.86.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.86.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.87.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.87.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.87.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.88.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.88.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.88.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.89.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.89.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.89.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.9.w1.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.9.w2.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.9.w3.weight": "model-00145-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.90.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.90.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.90.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.91.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.91.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.91.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.92.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.92.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.92.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.93.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.93.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.93.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.94.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.94.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.94.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.95.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.95.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.95.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.96.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.96.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.96.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.97.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.97.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.97.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.98.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.98.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.98.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.99.w1.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.99.w2.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.experts.99.w3.weight": "model-00148-of-00194.safetensors", + "model.layers.26.block_sparse_moe.gate.weight": "model-00144-of-00194.safetensors", + "model.layers.26.input_layernorm.weight": "model-00150-of-00194.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00150-of-00194.safetensors", + "model.layers.26.residual_layernorm.weight": "model-00150-of-00194.safetensors", + "model.layers.26.residual_mlp.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.26.residual_mlp.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.26.residual_mlp.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00144-of-00194.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00144-of-00194.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00144-of-00194.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00144-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.10.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.10.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.10.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.100.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.100.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.100.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.101.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.101.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.101.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.102.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.102.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.102.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.103.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.103.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.103.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.104.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.104.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.104.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.105.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.105.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.105.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.106.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.106.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.106.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.107.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.107.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.107.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.108.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.108.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.108.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.109.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.109.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.109.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.11.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.11.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.11.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.110.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.110.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.110.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.111.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.111.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.111.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.112.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.112.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.112.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.113.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.113.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.113.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.114.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.114.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.114.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.115.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.115.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.115.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.116.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.116.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.116.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.117.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.117.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.117.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.118.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.118.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.118.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.119.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.119.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.119.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.12.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.12.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.12.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.120.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.120.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.120.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.121.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.121.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.121.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.122.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.122.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.122.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.123.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.123.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.123.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.124.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.124.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.124.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.125.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.125.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.125.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.126.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.126.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.126.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.127.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.127.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.127.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.13.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.13.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.13.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.14.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.14.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.14.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.15.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.15.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.15.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.16.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.16.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.16.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.17.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.17.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.17.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.18.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.18.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.18.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.19.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.19.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.19.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.20.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.20.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.20.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.21.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.21.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.21.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.22.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.22.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.22.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.23.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.23.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.23.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.24.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.24.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.24.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.25.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.25.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.25.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.26.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.26.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.26.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.27.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.27.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.27.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.28.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.28.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.28.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.29.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.29.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.29.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.30.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.30.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.30.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.31.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.31.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.31.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.32.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.32.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.32.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.33.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.33.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.33.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.34.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.34.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.34.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.35.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.35.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.35.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.36.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.36.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.36.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.37.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.37.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.37.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.38.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.38.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.38.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.39.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.39.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.39.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.40.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.40.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.40.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.41.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.41.w2.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.41.w3.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.42.w1.weight": "model-00151-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.42.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.42.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.43.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.43.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.43.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.44.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.44.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.44.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.45.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.45.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.45.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.46.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.46.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.46.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.47.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.47.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.47.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.48.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.48.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.48.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.49.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.49.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.49.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.50.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.50.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.50.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.51.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.51.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.51.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.52.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.52.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.52.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.53.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.53.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.53.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.54.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.54.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.54.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.55.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.55.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.55.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.56.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.56.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.56.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.57.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.57.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.57.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.58.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.58.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.58.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.59.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.59.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.59.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.60.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.60.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.60.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.61.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.61.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.61.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.62.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.62.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.62.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.63.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.63.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.63.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.64.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.64.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.64.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.65.w1.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.65.w2.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.65.w3.weight": "model-00152-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.66.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.66.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.66.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.67.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.67.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.67.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.68.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.68.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.68.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.69.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.69.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.69.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.70.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.70.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.70.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.71.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.71.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.71.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.72.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.72.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.72.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.73.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.73.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.73.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.74.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.74.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.74.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.75.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.75.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.75.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.76.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.76.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.76.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.77.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.77.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.77.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.78.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.78.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.78.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.79.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.79.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.79.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.8.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.8.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.8.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.80.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.80.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.80.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.81.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.81.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.81.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.82.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.82.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.82.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.83.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.83.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.83.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.84.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.84.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.84.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.85.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.85.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.85.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.86.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.86.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.86.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.87.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.87.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.87.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.88.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.88.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.88.w3.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.89.w1.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.89.w2.weight": "model-00153-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.89.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.9.w1.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.9.w2.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.9.w3.weight": "model-00150-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.90.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.90.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.90.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.91.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.91.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.91.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.92.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.92.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.92.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.93.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.93.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.93.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.94.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.94.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.94.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.95.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.95.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.95.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.96.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.96.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.96.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.97.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.97.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.97.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.98.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.98.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.98.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.99.w1.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.99.w2.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.experts.99.w3.weight": "model-00154-of-00194.safetensors", + "model.layers.27.block_sparse_moe.gate.weight": "model-00150-of-00194.safetensors", + "model.layers.27.input_layernorm.weight": "model-00155-of-00194.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00155-of-00194.safetensors", + "model.layers.27.residual_layernorm.weight": "model-00155-of-00194.safetensors", + "model.layers.27.residual_mlp.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.27.residual_mlp.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.27.residual_mlp.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00150-of-00194.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00150-of-00194.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00150-of-00194.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00150-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.10.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.10.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.10.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.100.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.100.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.100.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.101.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.101.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.101.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.102.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.102.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.102.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.103.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.103.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.103.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.104.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.104.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.104.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.105.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.105.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.105.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.106.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.106.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.106.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.107.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.107.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.107.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.108.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.108.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.108.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.109.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.109.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.109.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.11.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.11.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.11.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.110.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.110.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.110.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.111.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.111.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.111.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.112.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.112.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.112.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.113.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.113.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.113.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.114.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.114.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.114.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.115.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.115.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.115.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.116.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.116.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.116.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.117.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.117.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.117.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.118.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.118.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.118.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.119.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.119.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.119.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.12.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.12.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.12.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.120.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.120.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.120.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.121.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.121.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.121.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.122.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.122.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.122.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.123.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.123.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.123.w3.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.124.w1.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.124.w2.weight": "model-00160-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.124.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.125.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.125.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.125.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.126.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.126.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.126.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.127.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.127.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.127.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.13.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.13.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.13.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.14.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.14.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.14.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.15.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.15.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.15.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.16.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.16.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.16.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.17.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.17.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.17.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.18.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.18.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.18.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.19.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.19.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.19.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.20.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.20.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.20.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.21.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.21.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.21.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.22.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.22.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.22.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.23.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.23.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.23.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.24.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.24.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.24.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.25.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.25.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.25.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.26.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.26.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.26.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.27.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.27.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.27.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.28.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.28.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.28.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.29.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.29.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.29.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.30.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.30.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.30.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.31.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.31.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.31.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.32.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.32.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.32.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.33.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.33.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.33.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.34.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.34.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.34.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.35.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.35.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.35.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.36.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.36.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.36.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.37.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.37.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.37.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.38.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.38.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.38.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.39.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.39.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.39.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.40.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.40.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.40.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.41.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.41.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.41.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.42.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.42.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.42.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.43.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.43.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.43.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.44.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.44.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.44.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.45.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.45.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.45.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.46.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.46.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.46.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.47.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.47.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.47.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.48.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.48.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.48.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.49.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.49.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.49.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00155-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00155-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.50.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.50.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.50.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.51.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.51.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.51.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.52.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.52.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.52.w3.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.53.w1.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.53.w2.weight": "model-00157-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.53.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.54.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.54.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.54.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.55.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.55.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.55.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.56.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.56.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.56.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.57.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.57.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.57.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.58.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.58.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.58.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.59.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.59.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.59.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00155-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.60.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.60.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.60.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.61.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.61.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.61.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.62.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.62.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.62.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.63.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.63.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.63.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.64.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.64.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.64.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.65.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.65.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.65.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.66.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.66.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.66.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.67.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.67.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.67.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.68.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.68.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.68.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.69.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.69.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.69.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.70.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.70.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.70.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.71.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.71.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.71.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.72.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.72.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.72.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.73.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.73.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.73.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.74.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.74.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.74.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.75.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.75.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.75.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.76.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.76.w2.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.76.w3.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.77.w1.weight": "model-00158-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.77.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.77.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.78.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.78.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.78.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.79.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.79.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.79.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.8.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.8.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.8.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.80.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.80.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.80.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.81.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.81.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.81.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.82.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.82.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.82.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.83.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.83.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.83.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.84.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.84.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.84.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.85.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.85.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.85.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.86.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.86.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.86.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.87.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.87.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.87.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.88.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.88.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.88.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.89.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.89.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.89.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.9.w1.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.9.w2.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.9.w3.weight": "model-00156-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.90.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.90.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.90.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.91.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.91.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.91.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.92.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.92.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.92.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.93.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.93.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.93.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.94.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.94.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.94.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.95.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.95.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.95.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.96.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.96.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.96.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.97.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.97.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.97.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.98.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.98.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.98.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.99.w1.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.99.w2.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.experts.99.w3.weight": "model-00159-of-00194.safetensors", + "model.layers.28.block_sparse_moe.gate.weight": "model-00155-of-00194.safetensors", + "model.layers.28.input_layernorm.weight": "model-00161-of-00194.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00161-of-00194.safetensors", + "model.layers.28.residual_layernorm.weight": "model-00161-of-00194.safetensors", + "model.layers.28.residual_mlp.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.28.residual_mlp.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.28.residual_mlp.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00155-of-00194.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00155-of-00194.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00155-of-00194.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00155-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.10.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.10.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.10.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.100.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.100.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.100.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.101.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.101.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.101.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.102.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.102.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.102.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.103.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.103.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.103.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.104.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.104.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.104.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.105.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.105.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.105.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.106.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.106.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.106.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.107.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.107.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.107.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.108.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.108.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.108.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.109.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.109.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.109.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.11.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.11.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.11.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.110.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.110.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.110.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.111.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.111.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.111.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.112.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.112.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.112.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.113.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.113.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.113.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.114.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.114.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.114.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.115.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.115.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.115.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.116.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.116.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.116.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.117.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.117.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.117.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.118.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.118.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.118.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.119.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.119.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.119.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.12.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.12.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.12.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.120.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.120.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.120.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.121.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.121.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.121.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.122.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.122.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.122.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.123.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.123.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.123.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.124.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.124.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.124.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.125.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.125.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.125.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.126.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.126.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.126.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.127.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.127.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.127.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.13.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.13.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.13.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.14.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.14.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.14.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.15.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.15.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.15.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.16.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.16.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.16.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.17.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.17.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.17.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.18.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.18.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.18.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.19.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.19.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.19.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.20.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.20.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.20.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.21.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.21.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.21.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.22.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.22.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.22.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.23.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.23.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.23.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.24.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.24.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.24.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.25.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.25.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.25.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.26.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.26.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.26.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.27.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.27.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.27.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.28.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.28.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.28.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.29.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.29.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.29.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.30.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.30.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.30.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.31.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.31.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.31.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.32.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.32.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.32.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.33.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.33.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.33.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.34.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.34.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.34.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.35.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.35.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.35.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.36.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.36.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.36.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.37.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.37.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.37.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.38.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.38.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.38.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.39.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.39.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.39.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.40.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.40.w2.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.40.w3.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.41.w1.weight": "model-00162-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.41.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.41.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.42.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.42.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.42.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.43.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.43.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.43.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.44.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.44.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.44.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.45.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.45.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.45.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.46.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.46.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.46.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.47.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.47.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.47.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.48.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.48.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.48.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.49.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.49.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.49.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.50.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.50.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.50.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.51.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.51.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.51.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.52.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.52.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.52.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.53.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.53.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.53.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.54.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.54.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.54.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.55.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.55.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.55.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.56.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.56.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.56.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.57.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.57.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.57.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.58.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.58.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.58.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.59.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.59.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.59.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.60.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.60.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.60.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.61.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.61.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.61.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.62.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.62.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.62.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.63.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.63.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.63.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.64.w1.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.64.w2.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.64.w3.weight": "model-00163-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.65.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.65.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.65.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.66.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.66.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.66.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.67.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.67.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.67.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.68.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.68.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.68.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.69.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.69.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.69.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.70.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.70.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.70.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.71.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.71.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.71.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.72.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.72.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.72.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.73.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.73.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.73.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.74.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.74.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.74.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.75.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.75.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.75.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.76.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.76.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.76.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.77.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.77.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.77.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.78.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.78.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.78.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.79.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.79.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.79.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.8.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.8.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.8.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.80.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.80.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.80.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.81.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.81.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.81.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.82.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.82.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.82.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.83.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.83.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.83.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.84.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.84.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.84.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.85.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.85.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.85.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.86.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.86.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.86.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.87.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.87.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.87.w3.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.88.w1.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.88.w2.weight": "model-00164-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.88.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.89.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.89.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.89.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.9.w1.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.9.w2.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.9.w3.weight": "model-00161-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.90.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.90.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.90.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.91.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.91.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.91.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.92.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.92.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.92.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.93.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.93.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.93.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.94.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.94.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.94.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.95.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.95.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.95.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.96.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.96.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.96.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.97.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.97.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.97.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.98.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.98.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.98.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.99.w1.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.99.w2.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.experts.99.w3.weight": "model-00165-of-00194.safetensors", + "model.layers.29.block_sparse_moe.gate.weight": "model-00161-of-00194.safetensors", + "model.layers.29.input_layernorm.weight": "model-00166-of-00194.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00166-of-00194.safetensors", + "model.layers.29.residual_layernorm.weight": "model-00166-of-00194.safetensors", + "model.layers.29.residual_mlp.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.29.residual_mlp.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.29.residual_mlp.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00161-of-00194.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00161-of-00194.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00161-of-00194.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00161-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.10.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.10.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.10.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.100.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.100.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.100.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.101.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.101.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.101.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.102.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.102.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.102.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.103.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.103.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.103.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.104.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.104.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.104.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.105.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.105.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.105.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.106.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.106.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.106.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.107.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.107.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.107.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.108.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.108.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.108.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.109.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.109.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.109.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.11.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.11.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.11.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.110.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.110.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.110.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.111.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.111.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.111.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.112.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.112.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.112.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.113.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.113.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.113.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.114.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.114.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.114.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.115.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.115.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.115.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.116.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.116.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.116.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.117.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.117.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.117.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.118.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.118.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.118.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.119.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.119.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.119.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.12.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.12.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.12.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.120.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.120.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.120.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.121.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.121.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.121.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.122.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.122.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.122.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.123.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.123.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.123.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.124.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.124.w2.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.124.w3.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.125.w1.weight": "model-00022-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.125.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.125.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.126.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.126.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.126.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.127.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.127.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.127.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.13.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.13.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.13.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.14.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.14.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.14.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.15.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.15.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.15.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.16.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.16.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.16.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.17.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.17.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.17.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.18.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.18.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.18.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.19.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.19.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.19.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.20.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.20.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.20.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.21.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.21.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.21.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.22.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.22.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.22.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.23.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.23.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.23.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.24.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.24.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.24.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.25.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.25.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.25.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.26.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.26.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.26.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.27.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.27.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.27.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.28.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.28.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.28.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.29.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.29.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.29.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.30.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.30.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.30.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.31.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.31.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.31.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.32.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.32.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.32.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.33.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.33.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.33.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.34.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.34.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.34.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.35.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.35.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.35.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.36.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.36.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.36.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.37.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.37.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.37.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.38.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.38.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.38.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.39.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.39.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.39.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.40.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.40.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.40.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.41.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.41.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.41.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.42.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.42.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.42.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.43.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.43.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.43.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.44.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.44.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.44.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.45.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.45.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.45.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.46.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.46.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.46.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.47.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.47.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.47.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.48.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.48.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.48.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.49.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.49.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.49.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.50.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.50.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.50.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.51.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.51.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.51.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.52.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.52.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.52.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.53.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.53.w2.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.53.w3.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.54.w1.weight": "model-00019-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.54.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.54.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.55.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.55.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.55.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.56.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.56.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.56.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.57.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.57.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.57.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.58.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.58.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.58.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.59.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.59.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.59.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00017-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.60.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.60.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.60.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.61.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.61.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.61.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.62.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.62.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.62.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.63.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.63.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.63.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.64.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.64.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.64.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.65.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.65.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.65.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.66.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.66.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.66.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.67.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.67.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.67.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.68.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.68.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.68.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.69.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.69.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.69.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.70.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.70.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.70.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.71.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.71.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.71.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.72.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.72.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.72.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.73.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.73.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.73.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.74.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.74.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.74.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.75.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.75.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.75.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.76.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.76.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.76.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.77.w1.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.77.w2.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.77.w3.weight": "model-00020-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.78.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.78.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.78.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.79.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.79.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.79.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.8.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.8.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.8.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.80.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.80.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.80.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.81.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.81.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.81.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.82.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.82.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.82.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.83.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.83.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.83.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.84.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.84.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.84.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.85.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.85.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.85.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.86.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.86.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.86.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.87.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.87.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.87.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.88.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.88.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.88.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.89.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.89.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.89.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.9.w1.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.9.w2.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.9.w3.weight": "model-00018-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.90.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.90.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.90.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.91.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.91.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.91.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.92.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.92.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.92.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.93.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.93.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.93.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.94.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.94.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.94.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.95.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.95.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.95.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.96.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.96.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.96.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.97.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.97.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.97.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.98.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.98.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.98.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.99.w1.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.99.w2.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.experts.99.w3.weight": "model-00021-of-00194.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00017-of-00194.safetensors", + "model.layers.3.input_layernorm.weight": "model-00023-of-00194.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00023-of-00194.safetensors", + "model.layers.3.residual_layernorm.weight": "model-00023-of-00194.safetensors", + "model.layers.3.residual_mlp.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.3.residual_mlp.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.3.residual_mlp.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00017-of-00194.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00017-of-00194.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00017-of-00194.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00017-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.10.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.10.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.10.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.100.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.100.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.100.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.101.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.101.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.101.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.102.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.102.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.102.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.103.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.103.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.103.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.104.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.104.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.104.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.105.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.105.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.105.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.106.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.106.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.106.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.107.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.107.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.107.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.108.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.108.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.108.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.109.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.109.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.109.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.11.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.11.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.11.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.110.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.110.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.110.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.111.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.111.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.111.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.112.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.112.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.112.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.113.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.113.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.113.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.114.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.114.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.114.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.115.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.115.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.115.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.116.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.116.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.116.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.117.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.117.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.117.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.118.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.118.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.118.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.119.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.119.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.119.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.12.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.12.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.12.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.120.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.120.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.120.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.121.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.121.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.121.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.122.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.122.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.122.w3.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.123.w1.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.123.w2.weight": "model-00171-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.123.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.124.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.124.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.124.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.125.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.125.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.125.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.126.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.126.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.126.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.127.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.127.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.127.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.13.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.13.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.13.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.14.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.14.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.14.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.15.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.15.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.15.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.16.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.16.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.16.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.17.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.17.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.17.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.18.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.18.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.18.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.19.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.19.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.19.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.20.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.20.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.20.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.21.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.21.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.21.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.22.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.22.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.22.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.23.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.23.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.23.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.24.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.24.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.24.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.25.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.25.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.25.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.26.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.26.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.26.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.27.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.27.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.27.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.28.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.28.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.28.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.29.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.29.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.29.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.30.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.30.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.30.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.31.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.31.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.31.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.32.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.32.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.32.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.33.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.33.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.33.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.34.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.34.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.34.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.35.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.35.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.35.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.36.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.36.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.36.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.37.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.37.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.37.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.38.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.38.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.38.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.39.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.39.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.39.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00166-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00166-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.40.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.40.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.40.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.41.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.41.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.41.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.42.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.42.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.42.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.43.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.43.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.43.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.44.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.44.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.44.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.45.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.45.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.45.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.46.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.46.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.46.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.47.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.47.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.47.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.48.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.48.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.48.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.49.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.49.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.49.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00166-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.50.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.50.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.50.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.51.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.51.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.51.w3.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.52.w1.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.52.w2.weight": "model-00168-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.52.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.53.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.53.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.53.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.54.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.54.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.54.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.55.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.55.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.55.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.56.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.56.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.56.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.57.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.57.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.57.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.58.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.58.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.58.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.59.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.59.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.59.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.60.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.60.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.60.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.61.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.61.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.61.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.62.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.62.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.62.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.63.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.63.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.63.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.64.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.64.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.64.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.65.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.65.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.65.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.66.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.66.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.66.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.67.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.67.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.67.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.68.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.68.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.68.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.69.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.69.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.69.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.70.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.70.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.70.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.71.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.71.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.71.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.72.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.72.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.72.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.73.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.73.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.73.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.74.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.74.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.74.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.75.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.75.w2.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.75.w3.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.76.w1.weight": "model-00169-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.76.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.76.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.77.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.77.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.77.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.78.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.78.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.78.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.79.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.79.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.79.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.8.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.8.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.8.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.80.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.80.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.80.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.81.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.81.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.81.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.82.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.82.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.82.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.83.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.83.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.83.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.84.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.84.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.84.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.85.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.85.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.85.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.86.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.86.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.86.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.87.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.87.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.87.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.88.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.88.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.88.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.89.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.89.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.89.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.9.w1.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.9.w2.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.9.w3.weight": "model-00167-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.90.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.90.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.90.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.91.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.91.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.91.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.92.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.92.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.92.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.93.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.93.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.93.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.94.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.94.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.94.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.95.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.95.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.95.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.96.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.96.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.96.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.97.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.97.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.97.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.98.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.98.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.98.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.99.w1.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.99.w2.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.experts.99.w3.weight": "model-00170-of-00194.safetensors", + "model.layers.30.block_sparse_moe.gate.weight": "model-00166-of-00194.safetensors", + "model.layers.30.input_layernorm.weight": "model-00172-of-00194.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00172-of-00194.safetensors", + "model.layers.30.residual_layernorm.weight": "model-00172-of-00194.safetensors", + "model.layers.30.residual_mlp.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.30.residual_mlp.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.30.residual_mlp.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00166-of-00194.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00166-of-00194.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00166-of-00194.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00166-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.10.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.10.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.10.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.100.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.100.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.100.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.101.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.101.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.101.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.102.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.102.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.102.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.103.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.103.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.103.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.104.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.104.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.104.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.105.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.105.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.105.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.106.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.106.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.106.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.107.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.107.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.107.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.108.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.108.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.108.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.109.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.109.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.109.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.11.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.11.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.11.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.110.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.110.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.110.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.111.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.111.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.111.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.112.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.112.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.112.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.113.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.113.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.113.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.114.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.114.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.114.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.115.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.115.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.115.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.116.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.116.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.116.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.117.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.117.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.117.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.118.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.118.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.118.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.119.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.119.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.119.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.12.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.12.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.12.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.120.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.120.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.120.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.121.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.121.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.121.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.122.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.122.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.122.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.123.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.123.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.123.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.124.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.124.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.124.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.125.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.125.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.125.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.126.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.126.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.126.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.127.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.127.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.127.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.13.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.13.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.13.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.14.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.14.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.14.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.15.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.15.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.15.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.16.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.16.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.16.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.17.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.17.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.17.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.18.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.18.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.18.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.19.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.19.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.19.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.20.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.20.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.20.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.21.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.21.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.21.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.22.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.22.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.22.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.23.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.23.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.23.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.24.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.24.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.24.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.25.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.25.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.25.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.26.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.26.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.26.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.27.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.27.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.27.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.28.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.28.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.28.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.29.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.29.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.29.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.30.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.30.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.30.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.31.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.31.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.31.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.32.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.32.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.32.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.33.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.33.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.33.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.34.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.34.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.34.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.35.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.35.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.35.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.36.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.36.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.36.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.37.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.37.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.37.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.38.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.38.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.38.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.39.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.39.w2.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.39.w3.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.40.w1.weight": "model-00173-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.40.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.40.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.41.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.41.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.41.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.42.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.42.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.42.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.43.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.43.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.43.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.44.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.44.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.44.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.45.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.45.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.45.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.46.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.46.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.46.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.47.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.47.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.47.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.48.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.48.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.48.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.49.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.49.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.49.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.50.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.50.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.50.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.51.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.51.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.51.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.52.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.52.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.52.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.53.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.53.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.53.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.54.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.54.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.54.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.55.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.55.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.55.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.56.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.56.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.56.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.57.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.57.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.57.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.58.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.58.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.58.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.59.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.59.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.59.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.60.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.60.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.60.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.61.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.61.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.61.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.62.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.62.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.62.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.63.w1.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.63.w2.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.63.w3.weight": "model-00174-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.64.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.64.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.64.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.65.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.65.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.65.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.66.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.66.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.66.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.67.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.67.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.67.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.68.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.68.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.68.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.69.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.69.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.69.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.70.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.70.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.70.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.71.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.71.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.71.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.72.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.72.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.72.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.73.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.73.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.73.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.74.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.74.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.74.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.75.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.75.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.75.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.76.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.76.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.76.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.77.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.77.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.77.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.78.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.78.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.78.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.79.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.79.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.79.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.8.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.8.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.8.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.80.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.80.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.80.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.81.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.81.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.81.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.82.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.82.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.82.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.83.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.83.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.83.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.84.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.84.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.84.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.85.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.85.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.85.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.86.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.86.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.86.w3.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.87.w1.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.87.w2.weight": "model-00175-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.87.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.88.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.88.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.88.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.89.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.89.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.89.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.9.w1.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.9.w2.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.9.w3.weight": "model-00172-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.90.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.90.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.90.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.91.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.91.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.91.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.92.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.92.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.92.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.93.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.93.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.93.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.94.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.94.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.94.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.95.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.95.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.95.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.96.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.96.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.96.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.97.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.97.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.97.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.98.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.98.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.98.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.99.w1.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.99.w2.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.experts.99.w3.weight": "model-00176-of-00194.safetensors", + "model.layers.31.block_sparse_moe.gate.weight": "model-00172-of-00194.safetensors", + "model.layers.31.input_layernorm.weight": "model-00177-of-00194.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00177-of-00194.safetensors", + "model.layers.31.residual_layernorm.weight": "model-00177-of-00194.safetensors", + "model.layers.31.residual_mlp.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.31.residual_mlp.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.31.residual_mlp.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00172-of-00194.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00172-of-00194.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00172-of-00194.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00172-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.10.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.10.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.10.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.100.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.100.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.100.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.101.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.101.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.101.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.102.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.102.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.102.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.103.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.103.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.103.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.104.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.104.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.104.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.105.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.105.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.105.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.106.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.106.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.106.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.107.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.107.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.107.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.108.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.108.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.108.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.109.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.109.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.109.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.11.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.11.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.11.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.110.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.110.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.110.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.111.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.111.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.111.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.112.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.112.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.112.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.113.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.113.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.113.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.114.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.114.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.114.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.115.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.115.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.115.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.116.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.116.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.116.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.117.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.117.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.117.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.118.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.118.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.118.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.119.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.119.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.119.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.12.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.12.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.12.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.120.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.120.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.120.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.121.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.121.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.121.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.122.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.122.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.122.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.123.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.123.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.123.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.124.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.124.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.124.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.125.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.125.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.125.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.126.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.126.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.126.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.127.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.127.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.127.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.13.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.13.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.13.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.14.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.14.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.14.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.15.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.15.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.15.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.16.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.16.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.16.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.17.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.17.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.17.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.18.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.18.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.18.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.19.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.19.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.19.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.20.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.20.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.20.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.21.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.21.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.21.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.22.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.22.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.22.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.23.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.23.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.23.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.24.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.24.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.24.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.25.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.25.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.25.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.26.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.26.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.26.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.27.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.27.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.27.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.28.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.28.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.28.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.29.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.29.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.29.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w2.weight": "model-00177-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w3.weight": "model-00177-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.30.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.30.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.30.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.31.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.31.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.31.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.32.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.32.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.32.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.33.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.33.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.33.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.34.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.34.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.34.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.35.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.35.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.35.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.36.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.36.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.36.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.37.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.37.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.37.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.38.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.38.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.38.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.39.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.39.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.39.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w1.weight": "model-00177-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.40.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.40.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.40.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.41.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.41.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.41.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.42.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.42.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.42.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.43.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.43.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.43.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.44.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.44.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.44.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.45.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.45.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.45.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.46.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.46.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.46.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.47.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.47.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.47.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.48.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.48.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.48.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.49.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.49.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.49.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.50.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.50.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.50.w3.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.51.w1.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.51.w2.weight": "model-00179-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.51.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.52.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.52.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.52.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.53.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.53.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.53.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.54.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.54.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.54.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.55.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.55.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.55.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.56.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.56.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.56.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.57.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.57.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.57.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.58.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.58.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.58.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.59.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.59.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.59.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.60.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.60.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.60.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.61.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.61.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.61.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.62.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.62.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.62.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.63.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.63.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.63.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.64.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.64.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.64.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.65.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.65.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.65.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.66.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.66.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.66.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.67.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.67.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.67.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.68.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.68.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.68.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.69.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.69.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.69.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.70.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.70.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.70.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.71.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.71.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.71.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.72.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.72.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.72.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.73.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.73.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.73.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.74.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.74.w2.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.74.w3.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.75.w1.weight": "model-00180-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.75.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.75.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.76.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.76.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.76.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.77.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.77.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.77.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.78.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.78.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.78.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.79.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.79.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.79.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.8.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.8.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.8.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.80.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.80.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.80.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.81.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.81.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.81.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.82.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.82.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.82.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.83.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.83.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.83.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.84.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.84.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.84.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.85.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.85.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.85.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.86.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.86.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.86.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.87.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.87.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.87.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.88.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.88.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.88.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.89.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.89.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.89.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.9.w1.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.9.w2.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.9.w3.weight": "model-00178-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.90.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.90.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.90.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.91.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.91.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.91.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.92.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.92.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.92.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.93.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.93.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.93.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.94.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.94.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.94.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.95.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.95.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.95.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.96.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.96.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.96.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.97.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.97.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.97.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.98.w1.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.98.w2.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.98.w3.weight": "model-00181-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.99.w1.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.99.w2.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.experts.99.w3.weight": "model-00182-of-00194.safetensors", + "model.layers.32.block_sparse_moe.gate.weight": "model-00177-of-00194.safetensors", + "model.layers.32.input_layernorm.weight": "model-00183-of-00194.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00183-of-00194.safetensors", + "model.layers.32.residual_layernorm.weight": "model-00183-of-00194.safetensors", + "model.layers.32.residual_mlp.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.32.residual_mlp.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.32.residual_mlp.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00177-of-00194.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00177-of-00194.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00177-of-00194.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00177-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.10.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.10.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.10.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.100.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.100.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.100.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.101.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.101.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.101.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.102.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.102.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.102.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.103.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.103.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.103.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.104.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.104.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.104.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.105.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.105.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.105.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.106.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.106.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.106.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.107.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.107.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.107.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.108.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.108.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.108.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.109.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.109.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.109.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.11.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.11.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.11.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.110.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.110.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.110.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.111.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.111.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.111.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.112.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.112.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.112.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.113.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.113.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.113.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.114.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.114.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.114.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.115.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.115.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.115.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.116.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.116.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.116.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.117.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.117.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.117.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.118.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.118.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.118.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.119.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.119.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.119.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.12.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.12.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.12.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.120.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.120.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.120.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.121.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.121.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.121.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.122.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.122.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.122.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.123.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.123.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.123.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.124.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.124.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.124.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.125.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.125.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.125.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.126.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.126.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.126.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.127.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.127.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.127.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.13.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.13.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.13.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.14.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.14.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.14.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.15.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.15.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.15.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.16.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.16.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.16.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.17.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.17.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.17.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.18.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.18.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.18.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.19.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.19.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.19.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.20.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.20.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.20.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.21.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.21.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.21.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.22.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.22.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.22.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.23.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.23.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.23.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.24.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.24.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.24.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.25.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.25.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.25.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.26.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.26.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.26.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.27.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.27.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.27.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.28.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.28.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.28.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.29.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.29.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.29.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.30.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.30.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.30.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.31.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.31.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.31.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.32.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.32.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.32.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.33.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.33.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.33.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.34.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.34.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.34.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.35.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.35.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.35.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.36.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.36.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.36.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.37.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.37.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.37.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.38.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.38.w2.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.38.w3.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.39.w1.weight": "model-00184-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.39.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.39.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.40.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.40.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.40.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.41.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.41.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.41.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.42.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.42.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.42.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.43.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.43.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.43.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.44.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.44.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.44.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.45.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.45.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.45.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.46.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.46.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.46.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.47.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.47.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.47.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.48.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.48.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.48.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.49.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.49.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.49.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.50.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.50.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.50.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.51.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.51.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.51.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.52.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.52.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.52.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.53.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.53.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.53.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.54.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.54.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.54.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.55.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.55.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.55.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.56.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.56.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.56.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.57.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.57.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.57.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.58.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.58.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.58.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.59.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.59.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.59.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.60.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.60.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.60.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.61.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.61.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.61.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.62.w1.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.62.w2.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.62.w3.weight": "model-00185-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.63.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.63.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.63.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.64.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.64.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.64.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.65.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.65.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.65.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.66.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.66.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.66.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.67.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.67.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.67.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.68.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.68.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.68.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.69.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.69.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.69.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.70.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.70.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.70.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.71.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.71.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.71.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.72.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.72.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.72.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.73.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.73.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.73.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.74.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.74.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.74.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.75.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.75.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.75.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.76.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.76.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.76.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.77.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.77.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.77.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.78.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.78.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.78.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.79.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.79.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.79.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.8.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.8.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.8.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.80.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.80.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.80.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.81.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.81.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.81.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.82.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.82.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.82.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.83.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.83.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.83.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.84.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.84.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.84.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.85.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.85.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.85.w3.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.86.w1.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.86.w2.weight": "model-00186-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.86.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.87.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.87.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.87.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.88.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.88.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.88.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.89.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.89.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.89.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.9.w1.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.9.w2.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.9.w3.weight": "model-00183-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.90.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.90.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.90.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.91.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.91.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.91.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.92.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.92.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.92.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.93.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.93.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.93.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.94.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.94.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.94.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.95.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.95.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.95.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.96.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.96.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.96.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.97.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.97.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.97.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.98.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.98.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.98.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.99.w1.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.99.w2.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.experts.99.w3.weight": "model-00187-of-00194.safetensors", + "model.layers.33.block_sparse_moe.gate.weight": "model-00183-of-00194.safetensors", + "model.layers.33.input_layernorm.weight": "model-00188-of-00194.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00188-of-00194.safetensors", + "model.layers.33.residual_layernorm.weight": "model-00188-of-00194.safetensors", + "model.layers.33.residual_mlp.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.33.residual_mlp.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.33.residual_mlp.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00183-of-00194.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00183-of-00194.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00183-of-00194.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00183-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.10.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.10.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.10.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.100.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.100.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.100.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.101.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.101.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.101.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.102.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.102.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.102.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.103.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.103.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.103.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.104.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.104.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.104.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.105.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.105.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.105.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.106.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.106.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.106.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.107.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.107.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.107.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.108.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.108.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.108.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.109.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.109.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.109.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.11.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.11.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.11.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.110.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.110.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.110.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.111.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.111.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.111.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.112.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.112.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.112.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.113.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.113.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.113.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.114.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.114.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.114.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.115.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.115.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.115.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.116.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.116.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.116.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.117.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.117.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.117.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.118.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.118.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.118.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.119.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.119.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.119.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.12.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.12.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.12.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.120.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.120.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.120.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.121.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.121.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.121.w3.weight": "model-00194-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.122.w1.weight": "model-00194-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.122.w2.weight": "model-00194-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.122.w3.weight": "model-00194-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.123.w1.weight": "model-00194-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.123.w2.weight": "model-00194-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.123.w3.weight": "model-00194-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.124.w1.weight": "model-00194-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.124.w2.weight": "model-00194-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.124.w3.weight": "model-00194-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.125.w1.weight": "model-00194-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.125.w2.weight": "model-00194-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.125.w3.weight": "model-00194-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.126.w1.weight": "model-00194-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.126.w2.weight": "model-00194-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.126.w3.weight": "model-00194-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.127.w1.weight": "model-00194-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.127.w2.weight": "model-00194-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.127.w3.weight": "model-00194-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.13.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.13.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.13.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.14.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.14.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.14.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.15.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.15.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.15.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.16.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.16.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.16.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.17.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.17.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.17.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.18.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.18.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.18.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.19.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.19.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.19.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w2.weight": "model-00188-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w3.weight": "model-00188-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.20.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.20.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.20.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.21.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.21.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.21.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.22.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.22.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.22.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.23.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.23.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.23.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.24.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.24.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.24.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.25.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.25.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.25.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.26.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.26.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.26.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.27.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.27.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.27.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.28.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.28.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.28.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.29.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.29.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.29.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w1.weight": "model-00188-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.30.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.30.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.30.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.31.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.31.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.31.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.32.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.32.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.32.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.33.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.33.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.33.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.34.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.34.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.34.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.35.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.35.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.35.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.36.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.36.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.36.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.37.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.37.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.37.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.38.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.38.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.38.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.39.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.39.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.39.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.40.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.40.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.40.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.41.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.41.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.41.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.42.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.42.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.42.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.43.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.43.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.43.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.44.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.44.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.44.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.45.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.45.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.45.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.46.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.46.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.46.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.47.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.47.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.47.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.48.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.48.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.48.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.49.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.49.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.49.w3.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.50.w1.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.50.w2.weight": "model-00190-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.50.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.51.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.51.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.51.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.52.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.52.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.52.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.53.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.53.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.53.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.54.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.54.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.54.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.55.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.55.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.55.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.56.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.56.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.56.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.57.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.57.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.57.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.58.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.58.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.58.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.59.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.59.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.59.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.60.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.60.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.60.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.61.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.61.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.61.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.62.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.62.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.62.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.63.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.63.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.63.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.64.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.64.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.64.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.65.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.65.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.65.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.66.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.66.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.66.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.67.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.67.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.67.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.68.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.68.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.68.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.69.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.69.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.69.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.70.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.70.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.70.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.71.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.71.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.71.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.72.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.72.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.72.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.73.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.73.w2.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.73.w3.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.74.w1.weight": "model-00191-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.74.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.74.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.75.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.75.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.75.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.76.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.76.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.76.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.77.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.77.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.77.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.78.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.78.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.78.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.79.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.79.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.79.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.8.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.8.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.8.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.80.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.80.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.80.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.81.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.81.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.81.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.82.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.82.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.82.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.83.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.83.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.83.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.84.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.84.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.84.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.85.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.85.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.85.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.86.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.86.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.86.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.87.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.87.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.87.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.88.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.88.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.88.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.89.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.89.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.89.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.9.w1.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.9.w2.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.9.w3.weight": "model-00189-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.90.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.90.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.90.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.91.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.91.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.91.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.92.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.92.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.92.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.93.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.93.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.93.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.94.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.94.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.94.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.95.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.95.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.95.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.96.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.96.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.96.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.97.w1.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.97.w2.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.97.w3.weight": "model-00192-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.98.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.98.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.98.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.99.w1.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.99.w2.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.experts.99.w3.weight": "model-00193-of-00194.safetensors", + "model.layers.34.block_sparse_moe.gate.weight": "model-00188-of-00194.safetensors", + "model.layers.34.input_layernorm.weight": "model-00194-of-00194.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00194-of-00194.safetensors", + "model.layers.34.residual_layernorm.weight": "model-00194-of-00194.safetensors", + "model.layers.34.residual_mlp.w1.weight": "model-00194-of-00194.safetensors", + "model.layers.34.residual_mlp.w2.weight": "model-00194-of-00194.safetensors", + "model.layers.34.residual_mlp.w3.weight": "model-00194-of-00194.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00188-of-00194.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00188-of-00194.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00188-of-00194.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00188-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.10.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.10.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.10.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.100.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.100.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.100.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.101.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.101.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.101.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.102.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.102.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.102.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.103.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.103.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.103.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.104.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.104.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.104.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.105.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.105.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.105.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.106.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.106.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.106.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.107.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.107.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.107.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.108.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.108.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.108.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.109.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.109.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.109.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.11.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.11.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.11.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.110.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.110.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.110.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.111.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.111.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.111.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.112.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.112.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.112.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.113.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.113.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.113.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.114.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.114.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.114.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.115.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.115.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.115.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.116.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.116.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.116.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.117.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.117.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.117.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.118.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.118.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.118.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.119.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.119.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.119.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.12.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.12.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.12.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.120.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.120.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.120.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.121.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.121.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.121.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.122.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.122.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.122.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.123.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.123.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.123.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.124.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.124.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.124.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.125.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.125.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.125.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.126.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.126.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.126.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.127.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.127.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.127.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.13.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.13.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.13.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.14.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.14.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.14.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.15.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.15.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.15.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.16.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.16.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.16.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.17.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.17.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.17.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.18.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.18.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.18.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.19.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.19.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.19.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.20.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.20.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.20.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.21.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.21.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.21.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.22.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.22.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.22.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.23.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.23.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.23.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.24.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.24.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.24.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.25.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.25.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.25.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.26.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.26.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.26.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.27.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.27.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.27.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.28.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.28.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.28.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.29.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.29.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.29.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.30.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.30.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.30.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.31.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.31.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.31.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.32.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.32.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.32.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.33.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.33.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.33.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.34.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.34.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.34.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.35.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.35.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.35.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.36.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.36.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.36.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.37.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.37.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.37.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.38.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.38.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.38.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.39.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.39.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.39.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.40.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.40.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.40.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.41.w1.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.41.w2.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.41.w3.weight": "model-00024-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.42.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.42.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.42.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.43.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.43.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.43.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.44.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.44.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.44.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.45.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.45.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.45.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.46.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.46.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.46.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.47.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.47.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.47.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.48.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.48.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.48.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.49.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.49.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.49.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.50.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.50.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.50.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.51.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.51.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.51.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.52.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.52.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.52.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.53.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.53.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.53.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.54.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.54.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.54.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.55.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.55.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.55.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.56.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.56.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.56.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.57.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.57.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.57.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.58.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.58.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.58.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.59.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.59.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.59.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.60.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.60.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.60.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.61.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.61.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.61.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.62.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.62.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.62.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.63.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.63.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.63.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.64.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.64.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.64.w3.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.65.w1.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.65.w2.weight": "model-00025-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.65.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.66.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.66.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.66.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.67.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.67.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.67.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.68.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.68.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.68.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.69.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.69.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.69.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.70.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.70.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.70.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.71.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.71.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.71.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.72.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.72.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.72.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.73.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.73.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.73.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.74.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.74.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.74.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.75.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.75.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.75.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.76.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.76.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.76.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.77.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.77.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.77.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.78.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.78.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.78.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.79.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.79.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.79.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.8.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.8.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.8.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.80.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.80.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.80.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.81.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.81.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.81.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.82.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.82.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.82.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.83.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.83.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.83.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.84.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.84.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.84.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.85.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.85.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.85.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.86.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.86.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.86.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.87.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.87.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.87.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.88.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.88.w2.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.88.w3.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.89.w1.weight": "model-00026-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.89.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.89.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.9.w1.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.9.w2.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.9.w3.weight": "model-00023-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.90.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.90.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.90.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.91.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.91.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.91.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.92.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.92.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.92.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.93.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.93.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.93.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.94.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.94.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.94.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.95.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.95.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.95.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.96.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.96.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.96.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.97.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.97.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.97.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.98.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.98.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.98.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.99.w1.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.99.w2.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.experts.99.w3.weight": "model-00027-of-00194.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00023-of-00194.safetensors", + "model.layers.4.input_layernorm.weight": "model-00028-of-00194.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00028-of-00194.safetensors", + "model.layers.4.residual_layernorm.weight": "model-00028-of-00194.safetensors", + "model.layers.4.residual_mlp.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.4.residual_mlp.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.4.residual_mlp.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00023-of-00194.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00023-of-00194.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00023-of-00194.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00023-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.10.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.10.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.10.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.100.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.100.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.100.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.101.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.101.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.101.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.102.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.102.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.102.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.103.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.103.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.103.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.104.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.104.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.104.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.105.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.105.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.105.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.106.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.106.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.106.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.107.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.107.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.107.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.108.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.108.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.108.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.109.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.109.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.109.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.11.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.11.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.11.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.110.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.110.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.110.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.111.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.111.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.111.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.112.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.112.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.112.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.113.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.113.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.113.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.114.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.114.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.114.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.115.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.115.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.115.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.116.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.116.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.116.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.117.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.117.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.117.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.118.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.118.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.118.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.119.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.119.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.119.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.12.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.12.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.12.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.120.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.120.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.120.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.121.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.121.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.121.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.122.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.122.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.122.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.123.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.123.w2.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.123.w3.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.124.w1.weight": "model-00033-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.124.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.124.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.125.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.125.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.125.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.126.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.126.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.126.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.127.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.127.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.127.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.13.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.13.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.13.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.14.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.14.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.14.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.15.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.15.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.15.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.16.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.16.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.16.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.17.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.17.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.17.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.18.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.18.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.18.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.19.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.19.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.19.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.20.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.20.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.20.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.21.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.21.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.21.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.22.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.22.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.22.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.23.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.23.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.23.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.24.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.24.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.24.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.25.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.25.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.25.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.26.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.26.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.26.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.27.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.27.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.27.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.28.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.28.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.28.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.29.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.29.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.29.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.30.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.30.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.30.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.31.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.31.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.31.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.32.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.32.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.32.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.33.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.33.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.33.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.34.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.34.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.34.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.35.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.35.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.35.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.36.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.36.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.36.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.37.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.37.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.37.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.38.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.38.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.38.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.39.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.39.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.39.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.40.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.40.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.40.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.41.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.41.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.41.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.42.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.42.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.42.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.43.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.43.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.43.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.44.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.44.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.44.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.45.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.45.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.45.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.46.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.46.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.46.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.47.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.47.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.47.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.48.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.48.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.48.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.49.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.49.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.49.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00028-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00028-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00028-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.50.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.50.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.50.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.51.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.51.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.51.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.52.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.52.w2.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.52.w3.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.53.w1.weight": "model-00030-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.53.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.53.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.54.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.54.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.54.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.55.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.55.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.55.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.56.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.56.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.56.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.57.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.57.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.57.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.58.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.58.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.58.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.59.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.59.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.59.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.60.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.60.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.60.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.61.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.61.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.61.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.62.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.62.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.62.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.63.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.63.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.63.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.64.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.64.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.64.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.65.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.65.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.65.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.66.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.66.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.66.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.67.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.67.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.67.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.68.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.68.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.68.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.69.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.69.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.69.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.70.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.70.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.70.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.71.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.71.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.71.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.72.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.72.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.72.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.73.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.73.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.73.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.74.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.74.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.74.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.75.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.75.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.75.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.76.w1.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.76.w2.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.76.w3.weight": "model-00031-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.77.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.77.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.77.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.78.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.78.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.78.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.79.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.79.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.79.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.8.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.8.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.8.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.80.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.80.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.80.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.81.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.81.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.81.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.82.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.82.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.82.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.83.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.83.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.83.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.84.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.84.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.84.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.85.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.85.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.85.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.86.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.86.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.86.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.87.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.87.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.87.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.88.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.88.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.88.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.89.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.89.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.89.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.9.w1.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.9.w2.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.9.w3.weight": "model-00029-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.90.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.90.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.90.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.91.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.91.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.91.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.92.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.92.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.92.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.93.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.93.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.93.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.94.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.94.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.94.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.95.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.95.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.95.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.96.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.96.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.96.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.97.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.97.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.97.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.98.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.98.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.98.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.99.w1.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.99.w2.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.experts.99.w3.weight": "model-00032-of-00194.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00028-of-00194.safetensors", + "model.layers.5.input_layernorm.weight": "model-00034-of-00194.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00034-of-00194.safetensors", + "model.layers.5.residual_layernorm.weight": "model-00034-of-00194.safetensors", + "model.layers.5.residual_mlp.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.5.residual_mlp.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.5.residual_mlp.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00028-of-00194.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00028-of-00194.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00028-of-00194.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00028-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.10.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.10.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.10.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.100.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.100.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.100.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.101.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.101.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.101.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.102.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.102.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.102.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.103.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.103.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.103.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.104.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.104.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.104.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.105.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.105.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.105.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.106.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.106.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.106.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.107.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.107.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.107.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.108.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.108.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.108.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.109.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.109.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.109.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.11.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.11.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.11.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.110.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.110.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.110.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.111.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.111.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.111.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.112.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.112.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.112.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.113.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.113.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.113.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.114.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.114.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.114.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.115.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.115.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.115.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.116.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.116.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.116.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.117.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.117.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.117.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.118.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.118.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.118.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.119.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.119.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.119.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.12.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.12.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.12.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.120.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.120.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.120.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.121.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.121.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.121.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.122.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.122.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.122.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.123.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.123.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.123.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.124.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.124.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.124.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.125.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.125.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.125.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.126.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.126.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.126.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.127.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.127.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.127.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.13.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.13.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.13.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.14.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.14.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.14.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.15.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.15.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.15.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.16.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.16.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.16.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.17.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.17.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.17.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.18.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.18.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.18.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.19.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.19.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.19.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.20.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.20.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.20.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.21.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.21.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.21.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.22.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.22.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.22.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.23.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.23.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.23.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.24.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.24.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.24.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.25.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.25.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.25.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.26.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.26.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.26.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.27.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.27.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.27.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.28.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.28.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.28.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.29.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.29.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.29.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.30.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.30.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.30.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.31.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.31.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.31.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.32.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.32.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.32.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.33.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.33.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.33.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.34.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.34.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.34.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.35.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.35.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.35.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.36.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.36.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.36.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.37.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.37.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.37.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.38.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.38.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.38.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.39.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.39.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.39.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.40.w1.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.40.w2.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.40.w3.weight": "model-00035-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.41.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.41.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.41.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.42.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.42.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.42.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.43.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.43.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.43.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.44.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.44.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.44.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.45.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.45.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.45.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.46.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.46.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.46.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.47.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.47.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.47.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.48.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.48.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.48.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.49.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.49.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.49.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.50.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.50.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.50.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.51.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.51.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.51.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.52.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.52.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.52.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.53.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.53.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.53.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.54.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.54.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.54.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.55.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.55.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.55.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.56.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.56.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.56.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.57.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.57.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.57.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.58.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.58.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.58.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.59.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.59.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.59.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.60.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.60.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.60.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.61.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.61.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.61.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.62.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.62.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.62.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.63.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.63.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.63.w3.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.64.w1.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.64.w2.weight": "model-00036-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.64.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.65.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.65.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.65.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.66.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.66.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.66.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.67.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.67.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.67.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.68.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.68.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.68.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.69.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.69.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.69.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.70.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.70.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.70.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.71.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.71.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.71.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.72.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.72.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.72.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.73.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.73.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.73.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.74.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.74.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.74.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.75.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.75.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.75.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.76.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.76.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.76.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.77.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.77.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.77.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.78.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.78.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.78.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.79.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.79.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.79.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.8.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.8.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.8.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.80.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.80.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.80.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.81.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.81.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.81.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.82.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.82.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.82.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.83.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.83.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.83.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.84.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.84.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.84.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.85.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.85.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.85.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.86.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.86.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.86.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.87.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.87.w2.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.87.w3.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.88.w1.weight": "model-00037-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.88.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.88.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.89.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.89.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.89.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.9.w1.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.9.w2.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.9.w3.weight": "model-00034-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.90.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.90.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.90.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.91.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.91.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.91.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.92.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.92.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.92.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.93.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.93.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.93.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.94.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.94.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.94.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.95.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.95.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.95.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.96.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.96.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.96.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.97.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.97.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.97.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.98.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.98.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.98.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.99.w1.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.99.w2.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.experts.99.w3.weight": "model-00038-of-00194.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00034-of-00194.safetensors", + "model.layers.6.input_layernorm.weight": "model-00039-of-00194.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00039-of-00194.safetensors", + "model.layers.6.residual_layernorm.weight": "model-00039-of-00194.safetensors", + "model.layers.6.residual_mlp.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.6.residual_mlp.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.6.residual_mlp.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00034-of-00194.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00034-of-00194.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00034-of-00194.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00034-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.10.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.10.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.10.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.100.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.100.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.100.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.101.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.101.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.101.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.102.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.102.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.102.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.103.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.103.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.103.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.104.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.104.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.104.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.105.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.105.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.105.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.106.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.106.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.106.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.107.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.107.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.107.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.108.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.108.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.108.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.109.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.109.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.109.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.11.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.11.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.11.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.110.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.110.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.110.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.111.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.111.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.111.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.112.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.112.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.112.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.113.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.113.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.113.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.114.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.114.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.114.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.115.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.115.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.115.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.116.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.116.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.116.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.117.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.117.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.117.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.118.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.118.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.118.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.119.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.119.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.119.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.12.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.12.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.12.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.120.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.120.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.120.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.121.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.121.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.121.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.122.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.122.w2.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.122.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.123.w1.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.123.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.123.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.124.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.124.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.124.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.125.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.125.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.125.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.126.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.126.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.126.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.127.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.127.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.127.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.13.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.13.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.13.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.14.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.14.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.14.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.15.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.15.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.15.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.16.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.16.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.16.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.17.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.17.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.17.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.18.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.18.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.18.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.19.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.19.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.19.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.20.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.20.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.20.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.21.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.21.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.21.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.22.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.22.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.22.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.23.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.23.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.23.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.24.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.24.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.24.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.25.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.25.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.25.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.26.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.26.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.26.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.27.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.27.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.27.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.28.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.28.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.28.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.29.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.29.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.29.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.30.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.30.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.30.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.31.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.31.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.31.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.32.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.32.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.32.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.33.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.33.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.33.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.34.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.34.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.34.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.35.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.35.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.35.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.36.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.36.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.36.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.37.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.37.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.37.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.38.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.38.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.38.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.39.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.39.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.39.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00039-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00039-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00039-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.40.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.40.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.40.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.41.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.41.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.41.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.42.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.42.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.42.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.43.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.43.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.43.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.44.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.44.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.44.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.45.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.45.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.45.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.46.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.46.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.46.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.47.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.47.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.47.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.48.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.48.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.48.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.49.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.49.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.49.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.50.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.50.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.50.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.51.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.51.w2.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.51.w3.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.52.w1.weight": "model-00041-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.52.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.52.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.53.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.53.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.53.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.54.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.54.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.54.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.55.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.55.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.55.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.56.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.56.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.56.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.57.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.57.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.57.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.58.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.58.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.58.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.59.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.59.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.59.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.60.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.60.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.60.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.61.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.61.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.61.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.62.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.62.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.62.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.63.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.63.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.63.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.64.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.64.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.64.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.65.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.65.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.65.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.66.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.66.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.66.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.67.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.67.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.67.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.68.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.68.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.68.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.69.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.69.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.69.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.70.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.70.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.70.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.71.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.71.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.71.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.72.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.72.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.72.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.73.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.73.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.73.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.74.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.74.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.74.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.75.w1.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.75.w2.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.75.w3.weight": "model-00042-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.76.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.76.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.76.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.77.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.77.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.77.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.78.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.78.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.78.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.79.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.79.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.79.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.8.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.8.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.8.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.80.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.80.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.80.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.81.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.81.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.81.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.82.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.82.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.82.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.83.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.83.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.83.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.84.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.84.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.84.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.85.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.85.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.85.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.86.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.86.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.86.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.87.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.87.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.87.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.88.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.88.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.88.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.89.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.89.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.89.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.9.w1.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.9.w2.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.9.w3.weight": "model-00040-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.90.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.90.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.90.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.91.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.91.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.91.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.92.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.92.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.92.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.93.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.93.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.93.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.94.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.94.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.94.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.95.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.95.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.95.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.96.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.96.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.96.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.97.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.97.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.97.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.98.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.98.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.98.w3.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.99.w1.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.99.w2.weight": "model-00043-of-00194.safetensors", + "model.layers.7.block_sparse_moe.experts.99.w3.weight": "model-00044-of-00194.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00039-of-00194.safetensors", + "model.layers.7.input_layernorm.weight": "model-00045-of-00194.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00045-of-00194.safetensors", + "model.layers.7.residual_layernorm.weight": "model-00045-of-00194.safetensors", + "model.layers.7.residual_mlp.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.7.residual_mlp.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.7.residual_mlp.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00039-of-00194.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00039-of-00194.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00039-of-00194.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00039-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.10.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.10.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.10.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.100.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.100.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.100.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.101.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.101.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.101.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.102.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.102.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.102.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.103.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.103.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.103.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.104.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.104.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.104.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.105.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.105.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.105.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.106.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.106.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.106.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.107.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.107.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.107.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.108.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.108.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.108.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.109.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.109.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.109.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.11.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.11.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.11.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.110.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.110.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.110.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.111.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.111.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.111.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.112.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.112.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.112.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.113.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.113.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.113.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.114.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.114.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.114.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.115.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.115.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.115.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.116.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.116.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.116.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.117.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.117.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.117.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.118.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.118.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.118.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.119.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.119.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.119.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.12.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.12.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.12.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.120.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.120.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.120.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.121.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.121.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.121.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.122.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.122.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.122.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.123.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.123.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.123.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.124.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.124.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.124.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.125.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.125.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.125.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.126.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.126.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.126.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.127.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.127.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.127.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.13.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.13.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.13.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.14.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.14.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.14.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.15.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.15.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.15.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.16.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.16.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.16.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.17.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.17.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.17.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.18.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.18.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.18.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.19.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.19.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.19.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.20.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.20.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.20.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.21.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.21.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.21.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.22.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.22.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.22.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.23.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.23.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.23.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.24.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.24.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.24.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.25.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.25.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.25.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.26.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.26.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.26.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.27.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.27.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.27.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.28.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.28.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.28.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.29.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.29.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.29.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.30.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.30.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.30.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.31.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.31.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.31.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.32.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.32.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.32.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.33.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.33.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.33.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.34.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.34.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.34.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.35.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.35.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.35.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.36.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.36.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.36.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.37.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.37.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.37.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.38.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.38.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.38.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.39.w1.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.39.w2.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.39.w3.weight": "model-00046-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.40.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.40.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.40.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.41.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.41.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.41.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.42.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.42.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.42.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.43.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.43.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.43.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.44.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.44.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.44.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.45.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.45.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.45.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.46.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.46.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.46.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.47.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.47.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.47.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.48.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.48.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.48.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.49.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.49.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.49.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.50.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.50.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.50.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.51.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.51.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.51.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.52.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.52.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.52.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.53.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.53.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.53.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.54.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.54.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.54.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.55.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.55.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.55.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.56.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.56.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.56.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.57.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.57.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.57.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.58.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.58.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.58.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.59.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.59.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.59.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.60.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.60.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.60.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.61.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.61.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.61.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.62.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.62.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.62.w3.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.63.w1.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.63.w2.weight": "model-00047-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.63.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.64.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.64.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.64.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.65.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.65.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.65.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.66.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.66.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.66.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.67.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.67.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.67.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.68.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.68.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.68.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.69.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.69.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.69.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.70.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.70.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.70.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.71.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.71.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.71.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.72.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.72.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.72.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.73.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.73.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.73.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.74.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.74.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.74.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.75.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.75.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.75.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.76.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.76.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.76.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.77.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.77.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.77.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.78.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.78.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.78.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.79.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.79.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.79.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.8.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.8.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.8.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.80.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.80.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.80.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.81.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.81.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.81.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.82.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.82.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.82.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.83.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.83.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.83.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.84.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.84.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.84.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.85.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.85.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.85.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.86.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.86.w2.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.86.w3.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.87.w1.weight": "model-00048-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.87.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.87.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.88.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.88.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.88.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.89.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.89.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.89.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.9.w1.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.9.w2.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.9.w3.weight": "model-00045-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.90.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.90.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.90.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.91.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.91.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.91.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.92.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.92.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.92.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.93.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.93.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.93.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.94.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.94.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.94.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.95.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.95.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.95.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.96.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.96.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.96.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.97.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.97.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.97.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.98.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.98.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.98.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.99.w1.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.99.w2.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.experts.99.w3.weight": "model-00049-of-00194.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00045-of-00194.safetensors", + "model.layers.8.input_layernorm.weight": "model-00050-of-00194.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00050-of-00194.safetensors", + "model.layers.8.residual_layernorm.weight": "model-00050-of-00194.safetensors", + "model.layers.8.residual_mlp.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.8.residual_mlp.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.8.residual_mlp.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00045-of-00194.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00045-of-00194.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00045-of-00194.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00045-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.10.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.10.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.10.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.100.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.100.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.100.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.101.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.101.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.101.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.102.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.102.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.102.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.103.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.103.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.103.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.104.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.104.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.104.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.105.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.105.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.105.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.106.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.106.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.106.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.107.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.107.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.107.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.108.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.108.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.108.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.109.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.109.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.109.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.11.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.11.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.11.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.110.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.110.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.110.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.111.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.111.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.111.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.112.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.112.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.112.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.113.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.113.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.113.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.114.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.114.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.114.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.115.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.115.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.115.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.116.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.116.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.116.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.117.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.117.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.117.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.118.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.118.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.118.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.119.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.119.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.119.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.12.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.12.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.12.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.120.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.120.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.120.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.121.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.121.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.121.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.122.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.122.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.122.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.123.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.123.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.123.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.124.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.124.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.124.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.125.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.125.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.125.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.126.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.126.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.126.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.127.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.127.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.127.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.13.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.13.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.13.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.14.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.14.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.14.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.15.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.15.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.15.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.16.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.16.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.16.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.17.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.17.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.17.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.18.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.18.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.18.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.19.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.19.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.19.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.20.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.20.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.20.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.21.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.21.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.21.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.22.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.22.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.22.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.23.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.23.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.23.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.24.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.24.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.24.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.25.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.25.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.25.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.26.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.26.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.26.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.27.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.27.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.27.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.28.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.28.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.28.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.29.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.29.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.29.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00050-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00050-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00050-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.30.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.30.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.30.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.31.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.31.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.31.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.32.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.32.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.32.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.33.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.33.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.33.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.34.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.34.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.34.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.35.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.35.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.35.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.36.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.36.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.36.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.37.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.37.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.37.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.38.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.38.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.38.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.39.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.39.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.39.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.40.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.40.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.40.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.41.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.41.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.41.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.42.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.42.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.42.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.43.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.43.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.43.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.44.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.44.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.44.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.45.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.45.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.45.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.46.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.46.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.46.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.47.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.47.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.47.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.48.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.48.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.48.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.49.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.49.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.49.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.50.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.50.w2.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.50.w3.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.51.w1.weight": "model-00052-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.51.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.51.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.52.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.52.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.52.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.53.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.53.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.53.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.54.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.54.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.54.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.55.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.55.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.55.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.56.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.56.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.56.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.57.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.57.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.57.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.58.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.58.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.58.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.59.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.59.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.59.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.60.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.60.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.60.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.61.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.61.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.61.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.62.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.62.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.62.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.63.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.63.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.63.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.64.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.64.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.64.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.65.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.65.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.65.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.66.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.66.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.66.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.67.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.67.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.67.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.68.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.68.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.68.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.69.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.69.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.69.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.70.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.70.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.70.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.71.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.71.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.71.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.72.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.72.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.72.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.73.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.73.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.73.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.74.w1.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.74.w2.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.74.w3.weight": "model-00053-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.75.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.75.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.75.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.76.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.76.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.76.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.77.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.77.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.77.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.78.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.78.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.78.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.79.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.79.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.79.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.8.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.8.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.8.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.80.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.80.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.80.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.81.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.81.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.81.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.82.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.82.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.82.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.83.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.83.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.83.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.84.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.84.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.84.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.85.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.85.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.85.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.86.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.86.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.86.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.87.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.87.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.87.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.88.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.88.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.88.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.89.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.89.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.89.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.9.w1.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.9.w2.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.9.w3.weight": "model-00051-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.90.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.90.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.90.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.91.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.91.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.91.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.92.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.92.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.92.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.93.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.93.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.93.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.94.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.94.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.94.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.95.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.95.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.95.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.96.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.96.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.96.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.97.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.97.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.97.w3.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.98.w1.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.98.w2.weight": "model-00054-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.98.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.99.w1.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.99.w2.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.experts.99.w3.weight": "model-00055-of-00194.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00050-of-00194.safetensors", + "model.layers.9.input_layernorm.weight": "model-00056-of-00194.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00056-of-00194.safetensors", + "model.layers.9.residual_layernorm.weight": "model-00056-of-00194.safetensors", + "model.layers.9.residual_mlp.w1.weight": "model-00056-of-00194.safetensors", + "model.layers.9.residual_mlp.w2.weight": "model-00056-of-00194.safetensors", + "model.layers.9.residual_mlp.w3.weight": "model-00056-of-00194.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00050-of-00194.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00050-of-00194.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00050-of-00194.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00050-of-00194.safetensors", + "model.norm.weight": "model-00194-of-00194.safetensors" } } diff --git a/modeling_arctic.py b/modeling_arctic.py index 1eb8d1ba598d845cb3515113e1735c9f282f38c2..3ca99d9089e49032542cdae29e365cc3cc1027a4 100644 --- a/modeling_arctic.py +++ b/modeling_arctic.py @@ -56,7 +56,7 @@ from transformers.utils import ( ) from transformers.utils.import_utils import is_torch_fx_available from .configuration_arctic import ArcticConfig -from transformers.integrations.deepspeed import is_deepspeed_available +from transformers.integrations.deepspeed import is_deepspeed_available from transformers.utils.versions import require_version if is_deepspeed_available(): @@ -354,7 +354,7 @@ class ArcticAttention(nn.Module): ds_optimized_quantization_config=quantization_config, ds_optimized_base_weight_sharding=True, dtype=torch.bfloat16) - self.o_proj = get_arctic_linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=False, + self.o_proj = get_arctic_linear(self.hidden_size, self.hidden_size, bias=False, use_deepspeed_implementation=self.use_deepspeed_implementation, ds_optimized_lora_config=deepspeed_lora_config, ds_optimized_quantization_config=quantization_config, diff --git a/quant_config.json b/quant_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bb21d0fe3d33b254996641f658ca7bbecca0fb72 --- /dev/null +++ b/quant_config.json @@ -0,0 +1,4 @@ +{ + "bits": 8, + "group_size": 128 +} diff --git a/tokenizer_config.json b/tokenizer_config.json index a4c28fbee1158a818440d8b14cc6b94626724d9f..6bfd22334da0508be8a53003b0b8d58897fdf691 100644 --- a/tokenizer_config.json +++ b/tokenizer_config.json @@ -60,4 +60,4 @@ "tokenizer_class": "ArcticTokenizer", "unk_token": "", "use_default_system_prompt": false -} \ No newline at end of file +}