diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f65c613e358e34065ed2044617934f5879cdf2b6 --- /dev/null +++ b/config.json @@ -0,0 +1,31 @@ +{ + "_name_or_path": "mistral-community/Mixtral-8x22B-v0.1", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 6144, + "initializer_range": 0.02, + "intermediate_size": 16384, + "max_position_embeddings": 65536, + "model_type": "mixtral", + "num_attention_heads": 48, + "num_experts_per_tok": 2, + "num_hidden_layers": 56, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 1000000, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.40.0.dev0", + "use_cache": true, + "vocab_size": 32000 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..05283793bb9bb2f7a016ed90b7bedee58adbea84 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.40.0.dev0" +} diff --git a/model-00001-of-00059.safetensors b/model-00001-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec9a3e7918ea803c5b4c36448e8790621cf037b1 --- /dev/null +++ b/model-00001-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae336f5424490c3f680566aa6873485afbcbcdf90f5342f2767ef4120c96f564 +size 4998663696 diff --git a/model-00002-of-00059.safetensors b/model-00002-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..27383931a3a9560c7855a5bb62fb7ceb7614d747 --- /dev/null +++ b/model-00002-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d7afc34c3de23e68af648600f548ef12a5209897523ccefff0d7cbf5f653064 +size 4806799120 diff --git a/model-00003-of-00059.safetensors b/model-00003-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d678fe7ae11d9ae08916b819834388c8dba499b6 --- /dev/null +++ b/model-00003-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60bf0929e42a115ca4d518eecbe018148971fcd29dd48f740170ded8019c5956 +size 4806799120 diff --git a/model-00004-of-00059.safetensors b/model-00004-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..188ad7be5b9ca17fb626710942c7fbb0579eb250 --- /dev/null +++ b/model-00004-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7873adab3794b579cd3f1ee333e45eff1734d22e1109ff981b7f0983c15fa0fa +size 4806799120 diff --git a/model-00005-of-00059.safetensors b/model-00005-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9bdd18e780285d8150d14ee3652217bf03066c9f --- /dev/null +++ b/model-00005-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbd4e8d76f93784c6bd59f78bb0541d2b89b99cf3feee394e60c436c5a648dc7 +size 4806799120 diff --git a/model-00006-of-00059.safetensors b/model-00006-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..206eb594f1d860fb163af1ac61e1615b3e877800 --- /dev/null +++ b/model-00006-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7d6b4fa9c73523ceeefce840b8436143f4503c60ef31d7e8366be500ac93c40 +size 4806799120 diff --git a/model-00007-of-00059.safetensors b/model-00007-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..19f21f4d15112e22eeead017e2e6e7ca8bef3c37 --- /dev/null +++ b/model-00007-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d50635299ae14363e605f8127aae5cd39ceb789c66a0aab6b6bc1b1815ddcfa8 +size 4806799120 diff --git a/model-00008-of-00059.safetensors b/model-00008-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..19a8496ba9f5c70f5746d374400924d9d7d2565f --- /dev/null +++ b/model-00008-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:696ef1d02d1df97101ffc4ac2a85de6c694502f0e129325602d57c2b09250624 +size 4806799120 diff --git a/model-00009-of-00059.safetensors b/model-00009-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4c5fd119711742f61a8d748a5acad54b8333dcc7 --- /dev/null +++ b/model-00009-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1e793e546381a19912a2f864f70c73c2b48da02209dafa096a16010c5edba82 +size 4806799120 diff --git a/model-00010-of-00059.safetensors b/model-00010-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..41690fa8876d47428aa8646f002d89255960b271 --- /dev/null +++ b/model-00010-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07fdb6ea1e4964b86b532a7ea345c5b1a04701ec057cf7aa0a813ece62f06144 +size 4806799120 diff --git a/model-00011-of-00059.safetensors b/model-00011-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bea8386823f890585f7051570f6d6d6a0c446868 --- /dev/null +++ b/model-00011-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d758ab978b3314b1bc3319481b4b27663d4e234b168bedfca87449015557c91 +size 4806799136 diff --git a/model-00012-of-00059.safetensors b/model-00012-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b57cd07dd67584fdcfa6ac65487b5784d64fa5a2 --- /dev/null +++ b/model-00012-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc7a5d3368ff33a82e4008c263948d81b557f0e6e35a85ea9ed93160661062d5 +size 4806799152 diff --git a/model-00013-of-00059.safetensors b/model-00013-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7614853f35f1e2c3a63879d6e86cd653dcd30739 --- /dev/null +++ b/model-00013-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b26543aef403982dfb25dca72a05d0de385e409f11ff18b2545d779e8ca5b90c +size 4806799152 diff --git a/model-00014-of-00059.safetensors b/model-00014-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ab305d29d0c4db929c39fe31a5b6fe4433ae250 --- /dev/null +++ b/model-00014-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee3e706a1f2147510733d7ef9cf1acea042ebbeb3ea631cdb1ca3e13ced9cd4d +size 4806799152 diff --git a/model-00015-of-00059.safetensors b/model-00015-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ba7050346e238ca7618be96dc09ea1b51e55fa1 --- /dev/null +++ b/model-00015-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a3db11ca0638b9b00d9e73df311cb5ae16232a3e42a1b834c44ea2e16110641 +size 4806799152 diff --git a/model-00016-of-00059.safetensors b/model-00016-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..99e749c9cb7db23b97aa3dafc550702765ac2431 --- /dev/null +++ b/model-00016-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ee57ca961ea46bd99ae1fed12598d544303185f48e958d6ff27e36fe34868a1 +size 4806799152 diff --git a/model-00017-of-00059.safetensors b/model-00017-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a97ad66515e494f7acb8fad94e31f1664b7e19c4 --- /dev/null +++ b/model-00017-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:191f7105b59b87d56debed13472df645bb777a07bc5dd12a2ea65696ef2920b3 +size 4806799152 diff --git a/model-00018-of-00059.safetensors b/model-00018-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5fb5c6563a176037a6255536e5e3bdda67fbbb90 --- /dev/null +++ b/model-00018-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82b32291e4390459ef11b53d3d38e04fddd6dbf3d1d0820b26acd0442177a4d1 +size 4806799152 diff --git a/model-00019-of-00059.safetensors b/model-00019-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84fe9b9ff1d7483a2d3a3e49afdcde8de4cb96a5 --- /dev/null +++ b/model-00019-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee221e17bc0dc1212d50844a47876797f013bd9d3397f1d4e61f18c754f2abdd +size 4806799152 diff --git a/model-00020-of-00059.safetensors b/model-00020-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf58f9cc71cb3f8e0868002191b4242c1013043c --- /dev/null +++ b/model-00020-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7f34bb683f1d30308e9aebe30ec3242134e487f2f759b95af9e8c45cd37f041 +size 4806799152 diff --git a/model-00021-of-00059.safetensors b/model-00021-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c869dd2b4fbe1a41723b690609e317c164c81b23 --- /dev/null +++ b/model-00021-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ced601beb9620a970a4d85c8a0bd9f033e344213e86b05d71c10618e22c6b1a3 +size 4806799152 diff --git a/model-00022-of-00059.safetensors b/model-00022-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0e8601a007a2266114682d12f072745a6f14641 --- /dev/null +++ b/model-00022-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6052dad5df6f29631f790865cbe9a806ad90b30fd93a886a4ab07d24f60f27c +size 4806799152 diff --git a/model-00023-of-00059.safetensors b/model-00023-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d5c337472b054189cb3b222297e65b05d7c6479f --- /dev/null +++ b/model-00023-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d631180ea69a67a8a246286c4240b3546c2bcc7b1a2d95d3af992c1bdd6ab0ae +size 4806799152 diff --git a/model-00024-of-00059.safetensors b/model-00024-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..591b313beb185f28d7e678edaa336dff118d57b1 --- /dev/null +++ b/model-00024-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7270baa0433794caea5252f41ceb0d8cae39c19e526c0a75982e9387c1822b2f +size 4932529864 diff --git a/model-00025-of-00059.safetensors b/model-00025-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..be9f0502f75f52608fa85bdd875309d362b5def3 --- /dev/null +++ b/model-00025-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2b0c2cb28fdffbdc66d1747dbd023ba6f9cdd37327e08fed1349c8db2d8e6fe +size 4995542848 diff --git a/model-00026-of-00059.safetensors b/model-00026-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0818676952fccfa542e9303bd7f3f8d1a7c096a --- /dev/null +++ b/model-00026-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d79f6b1e20791c1f5aead52f8168520a95ec95a747b7f54b5a8633c05540cd9 +size 4995542848 diff --git a/model-00027-of-00059.safetensors b/model-00027-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..38780cb121a576e7da808e722817edec8d9ff7f4 --- /dev/null +++ b/model-00027-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3a7ab5efd133b092dbc4d01e8e090e1c69cdb9cadccbc260f54fb27b2c58818 +size 4932628288 diff --git a/model-00028-of-00059.safetensors b/model-00028-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c6960944f382c7c58ad0c3d82c45994eaa0445b4 --- /dev/null +++ b/model-00028-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c57f8f584898ec7c9aa4ece478e70f528232e92f811a625fdff8ef958d98819 +size 4806774344 diff --git a/model-00029-of-00059.safetensors b/model-00029-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e50837637b74b181806d4f07f7df45e8c9370c81 --- /dev/null +++ b/model-00029-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2df13538a001de7e453fb547c916a39b59a0addcf6217c85a2895ffa9ddccfba +size 4806799144 diff --git a/model-00030-of-00059.safetensors b/model-00030-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bdc20ef0db8b16de1ed6611d787b3d4ad88193b2 --- /dev/null +++ b/model-00030-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41d0caeea5be9ccf69402d908282603b3f053ca02c01bcfc514de9aba230aeba +size 4806799144 diff --git a/model-00031-of-00059.safetensors b/model-00031-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ab7530001e37409319bd3845a174532d6793e1bd --- /dev/null +++ b/model-00031-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58edcc69d63fada55173306136d15ed3b60d1cebebd2b2eabf9d67bb350582f8 +size 4806799144 diff --git a/model-00032-of-00059.safetensors b/model-00032-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..517b324636dcc4266e89b7dc8950237c264d32e7 --- /dev/null +++ b/model-00032-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a67d2547716fbd418f58296baf4f99e56310f2096b302d7c9bf05737954f074a +size 4806799144 diff --git a/model-00033-of-00059.safetensors b/model-00033-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6df5b5ec6b4290eb82cd13d4e6300bac95df6d7e --- /dev/null +++ b/model-00033-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9812b3ae7e07e262a050f157056fd3c97100016b2339cdbfba64c137f8ff0f4c +size 4806799152 diff --git a/model-00034-of-00059.safetensors b/model-00034-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..21324a6d932c35ff4831b208beff07269d9eb478 --- /dev/null +++ b/model-00034-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed48826f7311e498be8b50317e0f95d608a3daa89bba1bc20ba6ad5ede993a5a +size 4806799152 diff --git a/model-00035-of-00059.safetensors b/model-00035-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24b05593c414794bd36305b19b7a187fd625e214 --- /dev/null +++ b/model-00035-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6fd3897555ec8d8ac5df4f23596d61d28dd58ba813113a5f6012245c2762d4e +size 4806799152 diff --git a/model-00036-of-00059.safetensors b/model-00036-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..041a0f03d68b9482668d7ffe2274af1d1024d0de --- /dev/null +++ b/model-00036-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:190b679a13511fcdc56ea5779b3c51433b997a748c9173064ba713afa2fab9cd +size 4806799152 diff --git a/model-00037-of-00059.safetensors b/model-00037-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..210aa3f4dd03e7da5ccc2e50367d75c2c433ae74 --- /dev/null +++ b/model-00037-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74a08b2dadf091c56bd70757d67970d0cad6851932daff901e6b4ebc4045c324 +size 4806799152 diff --git a/model-00038-of-00059.safetensors b/model-00038-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..67407228c0daef4bfa877e7a611f6a3a2c5f9243 --- /dev/null +++ b/model-00038-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:986949881cf1e8ce2e7c9cc6f834fae7ec89a93e66fc319d18b9f389f231b881 +size 4806799152 diff --git a/model-00039-of-00059.safetensors b/model-00039-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..94859e4fe6410b743d7e69fc38e3a084b013778f --- /dev/null +++ b/model-00039-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53afc1743388209171e475b7fdbea783b1f0646c1ce1d0c4d15f91760c4571e6 +size 4806799152 diff --git a/model-00040-of-00059.safetensors b/model-00040-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e87d43e8351c4b1c8741c1bc12ea3d0a9638481a --- /dev/null +++ b/model-00040-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:936d449646cf6ecace4ac187004600f7bff47ace82a045815b6285158950b849 +size 4806799152 diff --git a/model-00041-of-00059.safetensors b/model-00041-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20c48a46334512a1234e0e3ef0856bc379a06f24 --- /dev/null +++ b/model-00041-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f28e0f7b99acd1eb93e316969a241bf87a83c7f764f10983c85cb470ca205d5 +size 4806799152 diff --git a/model-00042-of-00059.safetensors b/model-00042-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dadebde95d1f2b347b3bc7b08acf9e09ea328169 --- /dev/null +++ b/model-00042-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:192f4843606e133ebf3f72f1cfbf071b7cb9b099d84cae3b210c583e357ba460 +size 4806799152 diff --git a/model-00043-of-00059.safetensors b/model-00043-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f35980843005168776f851200109f7f46e60d19e --- /dev/null +++ b/model-00043-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cef086066b63a8e8054f521ae75548f88447019c10bf1ad05ad874145d75a51e +size 4806799152 diff --git a/model-00044-of-00059.safetensors b/model-00044-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cfa5999ad6e46c109184722aea519326e1528dff --- /dev/null +++ b/model-00044-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c244857a3f64f67fe77c7c5e0c476b980316e22f10357b70f0ff9f6cf277da3 +size 4806799152 diff --git a/model-00045-of-00059.safetensors b/model-00045-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..13ca2851873b0c751b767207f3f03b6586c3cf55 --- /dev/null +++ b/model-00045-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:189f584cc72dc72f814181a96f1536c7065f5285f54dea34a9a889504b177b24 +size 4806799152 diff --git a/model-00046-of-00059.safetensors b/model-00046-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f0d997e35bc0a236a26429959e0a106ed475e33 --- /dev/null +++ b/model-00046-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15f6a32a56fa84c12ae1a11a3ca3b1e284caa77c6628fd502050613b6de425a3 +size 4806799152 diff --git a/model-00047-of-00059.safetensors b/model-00047-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..915ef10dc56dee4abe717de268de573d8b577a2f --- /dev/null +++ b/model-00047-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:274d5ada187699a5325ccf09222040b5bbaae1f827e8e68001841e8f3157e275 +size 4806799152 diff --git a/model-00048-of-00059.safetensors b/model-00048-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29425cebb0fe330e060e0415281ed18dd21f2961 --- /dev/null +++ b/model-00048-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5492fd715f4ccad106c6593a91208c53858352beceb055aa7b0fa80f2f990e1 +size 4806799152 diff --git a/model-00049-of-00059.safetensors b/model-00049-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af89a49aa8dba856b5153718b86e4c03714b5566 --- /dev/null +++ b/model-00049-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:044f475f1b998a3f1114e919c18f22f13b353f42a4e58c539a2771328040822a +size 4806799152 diff --git a/model-00050-of-00059.safetensors b/model-00050-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..252798971e8a7268475a9767f7f9a7781c6c2259 --- /dev/null +++ b/model-00050-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e90239f76720228e1be101aab56632fcc3400773b79c1106441d1a8b0f93bbca +size 4806799152 diff --git a/model-00051-of-00059.safetensors b/model-00051-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88785726695dfbf20a0df3bcecdd3a953d27df3e --- /dev/null +++ b/model-00051-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c088e08bd17588d8fc72ddfbfecd215cdbbaf16cd023e83901f13e712d2d398 +size 4806799152 diff --git a/model-00052-of-00059.safetensors b/model-00052-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f58ff4f6f8257f630bf8529631d4fad82d62e063 --- /dev/null +++ b/model-00052-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fbe3fcf9f219568d74c2e110c4c43b7ba9638aa6669bb08cd0d93d74b2459e0 +size 4932529864 diff --git a/model-00053-of-00059.safetensors b/model-00053-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a9734cc83629887cb66d37c478d7a357175ab05 --- /dev/null +++ b/model-00053-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff82747bf374cd5186e98dc6536bf4045ff10c689902d43defc9a0aeaab46754 +size 4995542848 diff --git a/model-00054-of-00059.safetensors b/model-00054-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0425ae0eb052048bbd460dc94e61e80e2424812e --- /dev/null +++ b/model-00054-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcd6884a7ee73fc570af516ec55e4d575e03cca61948b2d97a0de68ca9b6c8a5 +size 4995542848 diff --git a/model-00055-of-00059.safetensors b/model-00055-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..27c5f00bc7c4a3ac6457b06152c8b03913f62228 --- /dev/null +++ b/model-00055-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c638d32df5d124a3ac5112221e8a5d5e5508eb0861b0304c1367604223efde9 +size 4932628288 diff --git a/model-00056-of-00059.safetensors b/model-00056-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76351f1d7d710a4b554d270a4361fbf8ff44bc83 --- /dev/null +++ b/model-00056-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbebb6167903b01cc8c8954a12e58087114ac0cd5701e92c4c13183d88ac7b40 +size 4806774344 diff --git a/model-00057-of-00059.safetensors b/model-00057-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b59d01f753a7e493c9daadd9c24b3974dfe4393 --- /dev/null +++ b/model-00057-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ded18d6f12f0a607a0e2b8aca3460674e3673c53abef0c36979accb12c2dca0c +size 4806799144 diff --git a/model-00058-of-00059.safetensors b/model-00058-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ed611dfbf7b406c62ba61d3a8b931458d933f35 --- /dev/null +++ b/model-00058-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7530ff697af7a4ccabaa2b9f7465d67d2ff7366ee4c622c4af19c1a1f1f628ab +size 4806799144 diff --git a/model-00059-of-00059.safetensors b/model-00059-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c9bb7d5e4ce8af2d03619c41b739322834cdd344 --- /dev/null +++ b/model-00059-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b5d0c4c98b7ce8ca5a81ff044d2a38fa58867127823a4a6242359b7d62ce864 +size 997233472 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..805c18819ee3c4a51164b634df4f4033c74583ad --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,1746 @@ +{ + "metadata": { + "total_size": 281241268224 + }, + "weight_map": { + "lm_head.weight": "model-00059-of-00059.safetensors", + "model.embed_tokens.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00059.safetensors", + "model.layers.0.input_layernorm.weight": "model-00002-of-00059.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00059.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00059.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00059.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00059.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00002-of-00059.safetensors", + "model.layers.1.input_layernorm.weight": "model-00003-of-00059.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00003-of-00059.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00059.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00059.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00059.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00011-of-00059.safetensors", + "model.layers.10.input_layernorm.weight": "model-00012-of-00059.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00012-of-00059.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00011-of-00059.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00011-of-00059.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00011-of-00059.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00011-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00012-of-00059.safetensors", + "model.layers.11.input_layernorm.weight": "model-00013-of-00059.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00013-of-00059.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00012-of-00059.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00012-of-00059.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00012-of-00059.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00012-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00013-of-00059.safetensors", + "model.layers.12.input_layernorm.weight": "model-00014-of-00059.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00014-of-00059.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00013-of-00059.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00013-of-00059.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00013-of-00059.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00013-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00014-of-00059.safetensors", + "model.layers.13.input_layernorm.weight": "model-00015-of-00059.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00015-of-00059.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00014-of-00059.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00014-of-00059.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00014-of-00059.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00014-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00015-of-00059.safetensors", + "model.layers.14.input_layernorm.weight": "model-00016-of-00059.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00016-of-00059.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00015-of-00059.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00015-of-00059.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00015-of-00059.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00015-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00016-of-00059.safetensors", + "model.layers.15.input_layernorm.weight": "model-00017-of-00059.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00017-of-00059.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00016-of-00059.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00016-of-00059.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00016-of-00059.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00016-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.gate.weight": "model-00017-of-00059.safetensors", + "model.layers.16.input_layernorm.weight": "model-00018-of-00059.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00018-of-00059.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00017-of-00059.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00017-of-00059.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00017-of-00059.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00017-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.gate.weight": "model-00018-of-00059.safetensors", + "model.layers.17.input_layernorm.weight": "model-00019-of-00059.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00019-of-00059.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00018-of-00059.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00018-of-00059.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00018-of-00059.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00018-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.gate.weight": "model-00019-of-00059.safetensors", + "model.layers.18.input_layernorm.weight": "model-00020-of-00059.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00020-of-00059.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00019-of-00059.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00019-of-00059.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00019-of-00059.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00019-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.gate.weight": "model-00020-of-00059.safetensors", + "model.layers.19.input_layernorm.weight": "model-00021-of-00059.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00021-of-00059.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00020-of-00059.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00020-of-00059.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00020-of-00059.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00020-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00003-of-00059.safetensors", + "model.layers.2.input_layernorm.weight": "model-00004-of-00059.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00004-of-00059.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00003-of-00059.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00003-of-00059.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00003-of-00059.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00003-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.gate.weight": "model-00021-of-00059.safetensors", + "model.layers.20.input_layernorm.weight": "model-00022-of-00059.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00022-of-00059.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00021-of-00059.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00021-of-00059.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00021-of-00059.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00021-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.gate.weight": "model-00022-of-00059.safetensors", + "model.layers.21.input_layernorm.weight": "model-00023-of-00059.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00023-of-00059.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00022-of-00059.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00022-of-00059.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00022-of-00059.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00022-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.gate.weight": "model-00023-of-00059.safetensors", + "model.layers.22.input_layernorm.weight": "model-00024-of-00059.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00024-of-00059.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00023-of-00059.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00023-of-00059.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00023-of-00059.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00023-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.gate.weight": "model-00025-of-00059.safetensors", + "model.layers.23.input_layernorm.weight": "model-00025-of-00059.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00025-of-00059.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00024-of-00059.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00025-of-00059.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00024-of-00059.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00024-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.gate.weight": "model-00026-of-00059.safetensors", + "model.layers.24.input_layernorm.weight": "model-00026-of-00059.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00026-of-00059.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00025-of-00059.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00026-of-00059.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00025-of-00059.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00026-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.gate.weight": "model-00027-of-00059.safetensors", + "model.layers.25.input_layernorm.weight": "model-00027-of-00059.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00027-of-00059.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00027-of-00059.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00027-of-00059.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00026-of-00059.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00027-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.26.block_sparse_moe.gate.weight": "model-00028-of-00059.safetensors", + "model.layers.26.input_layernorm.weight": "model-00029-of-00059.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00029-of-00059.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00028-of-00059.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00028-of-00059.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00028-of-00059.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00028-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.27.block_sparse_moe.gate.weight": "model-00029-of-00059.safetensors", + "model.layers.27.input_layernorm.weight": "model-00030-of-00059.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00030-of-00059.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00029-of-00059.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00029-of-00059.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00029-of-00059.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00029-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.28.block_sparse_moe.gate.weight": "model-00030-of-00059.safetensors", + "model.layers.28.input_layernorm.weight": "model-00031-of-00059.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00031-of-00059.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00030-of-00059.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00030-of-00059.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00030-of-00059.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00030-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.29.block_sparse_moe.gate.weight": "model-00031-of-00059.safetensors", + "model.layers.29.input_layernorm.weight": "model-00032-of-00059.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00032-of-00059.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00031-of-00059.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00031-of-00059.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00031-of-00059.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00031-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00004-of-00059.safetensors", + "model.layers.3.input_layernorm.weight": "model-00005-of-00059.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00005-of-00059.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00004-of-00059.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00004-of-00059.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00004-of-00059.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00004-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.30.block_sparse_moe.gate.weight": "model-00032-of-00059.safetensors", + "model.layers.30.input_layernorm.weight": "model-00033-of-00059.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00033-of-00059.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00032-of-00059.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00032-of-00059.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00032-of-00059.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00032-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.31.block_sparse_moe.gate.weight": "model-00033-of-00059.safetensors", + "model.layers.31.input_layernorm.weight": "model-00034-of-00059.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00034-of-00059.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00033-of-00059.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00033-of-00059.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00033-of-00059.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00033-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.gate.weight": "model-00034-of-00059.safetensors", + "model.layers.32.input_layernorm.weight": "model-00035-of-00059.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00035-of-00059.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00034-of-00059.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00034-of-00059.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00034-of-00059.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00034-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.gate.weight": "model-00035-of-00059.safetensors", + "model.layers.33.input_layernorm.weight": "model-00036-of-00059.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00036-of-00059.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00035-of-00059.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00035-of-00059.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00035-of-00059.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00035-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.gate.weight": "model-00036-of-00059.safetensors", + "model.layers.34.input_layernorm.weight": "model-00037-of-00059.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00037-of-00059.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00036-of-00059.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00036-of-00059.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00036-of-00059.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00036-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.gate.weight": "model-00037-of-00059.safetensors", + "model.layers.35.input_layernorm.weight": "model-00038-of-00059.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00038-of-00059.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00037-of-00059.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00037-of-00059.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00037-of-00059.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00037-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.gate.weight": "model-00038-of-00059.safetensors", + "model.layers.36.input_layernorm.weight": "model-00039-of-00059.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00039-of-00059.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00038-of-00059.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00038-of-00059.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00038-of-00059.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00038-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.gate.weight": "model-00039-of-00059.safetensors", + "model.layers.37.input_layernorm.weight": "model-00040-of-00059.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00040-of-00059.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00039-of-00059.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00039-of-00059.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00039-of-00059.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00039-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.gate.weight": "model-00040-of-00059.safetensors", + "model.layers.38.input_layernorm.weight": "model-00041-of-00059.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00041-of-00059.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00040-of-00059.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00040-of-00059.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00040-of-00059.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00040-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.gate.weight": "model-00041-of-00059.safetensors", + "model.layers.39.input_layernorm.weight": "model-00042-of-00059.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00042-of-00059.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00041-of-00059.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00041-of-00059.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00041-of-00059.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00041-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00005-of-00059.safetensors", + "model.layers.4.input_layernorm.weight": "model-00006-of-00059.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00006-of-00059.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00005-of-00059.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00005-of-00059.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00005-of-00059.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00005-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.gate.weight": "model-00042-of-00059.safetensors", + "model.layers.40.input_layernorm.weight": "model-00043-of-00059.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00043-of-00059.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00042-of-00059.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00042-of-00059.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00042-of-00059.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00042-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.gate.weight": "model-00043-of-00059.safetensors", + "model.layers.41.input_layernorm.weight": "model-00044-of-00059.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00044-of-00059.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00043-of-00059.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00043-of-00059.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00043-of-00059.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00043-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.gate.weight": "model-00044-of-00059.safetensors", + "model.layers.42.input_layernorm.weight": "model-00045-of-00059.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00045-of-00059.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00044-of-00059.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00044-of-00059.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00044-of-00059.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00044-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.gate.weight": "model-00045-of-00059.safetensors", + "model.layers.43.input_layernorm.weight": "model-00046-of-00059.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00046-of-00059.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00045-of-00059.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00045-of-00059.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00045-of-00059.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00045-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.gate.weight": "model-00046-of-00059.safetensors", + "model.layers.44.input_layernorm.weight": "model-00047-of-00059.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00047-of-00059.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00046-of-00059.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00046-of-00059.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00046-of-00059.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00046-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.gate.weight": "model-00047-of-00059.safetensors", + "model.layers.45.input_layernorm.weight": "model-00048-of-00059.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00048-of-00059.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00047-of-00059.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00047-of-00059.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00047-of-00059.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00047-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.gate.weight": "model-00048-of-00059.safetensors", + "model.layers.46.input_layernorm.weight": "model-00049-of-00059.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00049-of-00059.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00048-of-00059.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00048-of-00059.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00048-of-00059.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00048-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.gate.weight": "model-00049-of-00059.safetensors", + "model.layers.47.input_layernorm.weight": "model-00050-of-00059.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00050-of-00059.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00049-of-00059.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00049-of-00059.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00049-of-00059.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00049-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.gate.weight": "model-00050-of-00059.safetensors", + "model.layers.48.input_layernorm.weight": "model-00051-of-00059.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00051-of-00059.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00050-of-00059.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00050-of-00059.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00050-of-00059.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00050-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.gate.weight": "model-00051-of-00059.safetensors", + "model.layers.49.input_layernorm.weight": "model-00052-of-00059.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00052-of-00059.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00051-of-00059.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00051-of-00059.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00051-of-00059.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00051-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00006-of-00059.safetensors", + "model.layers.5.input_layernorm.weight": "model-00007-of-00059.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00007-of-00059.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00006-of-00059.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00006-of-00059.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00006-of-00059.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00006-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.gate.weight": "model-00053-of-00059.safetensors", + "model.layers.50.input_layernorm.weight": "model-00053-of-00059.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00053-of-00059.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00052-of-00059.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00053-of-00059.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00052-of-00059.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00052-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.gate.weight": "model-00054-of-00059.safetensors", + "model.layers.51.input_layernorm.weight": "model-00054-of-00059.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00054-of-00059.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00053-of-00059.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00054-of-00059.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00053-of-00059.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00054-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.4.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.4.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.4.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.5.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.5.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.5.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.6.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.6.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.6.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.7.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.7.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.7.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.gate.weight": "model-00055-of-00059.safetensors", + "model.layers.52.input_layernorm.weight": "model-00055-of-00059.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00055-of-00059.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00055-of-00059.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00055-of-00059.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00054-of-00059.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00055-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.4.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.4.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.4.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.5.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.5.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.5.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.6.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.6.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.6.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.7.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.7.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.7.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.53.block_sparse_moe.gate.weight": "model-00056-of-00059.safetensors", + "model.layers.53.input_layernorm.weight": "model-00057-of-00059.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00057-of-00059.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00056-of-00059.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00056-of-00059.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00056-of-00059.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00056-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.4.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.4.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.4.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.5.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.5.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.5.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.6.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.6.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.6.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.7.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.7.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.7.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.54.block_sparse_moe.gate.weight": "model-00057-of-00059.safetensors", + "model.layers.54.input_layernorm.weight": "model-00058-of-00059.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00058-of-00059.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00057-of-00059.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00057-of-00059.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00057-of-00059.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00057-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.4.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.4.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.4.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.5.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.5.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.5.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.6.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.6.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.6.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.7.w1.weight": "model-00059-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.7.w2.weight": "model-00059-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.7.w3.weight": "model-00059-of-00059.safetensors", + "model.layers.55.block_sparse_moe.gate.weight": "model-00058-of-00059.safetensors", + "model.layers.55.input_layernorm.weight": "model-00059-of-00059.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00059-of-00059.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00058-of-00059.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00058-of-00059.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00058-of-00059.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00058-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00007-of-00059.safetensors", + "model.layers.6.input_layernorm.weight": "model-00008-of-00059.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00008-of-00059.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00007-of-00059.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00007-of-00059.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00007-of-00059.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00007-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00008-of-00059.safetensors", + "model.layers.7.input_layernorm.weight": "model-00009-of-00059.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00009-of-00059.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00008-of-00059.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00008-of-00059.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00008-of-00059.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00008-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00009-of-00059.safetensors", + "model.layers.8.input_layernorm.weight": "model-00010-of-00059.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00010-of-00059.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00009-of-00059.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00009-of-00059.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00009-of-00059.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00009-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00010-of-00059.safetensors", + "model.layers.9.input_layernorm.weight": "model-00011-of-00059.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00011-of-00059.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00010-of-00059.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00010-of-00059.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00010-of-00059.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00010-of-00059.safetensors", + "model.norm.weight": "model-00059-of-00059.safetensors" + } +}