diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0e582c0a3b363a7d1be66f0b5b197d953edbde9f --- /dev/null +++ b/config.json @@ -0,0 +1,35 @@ +{ + "_name_or_path": "./superbeyonder", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_length": 4096, + "max_position_embeddings": 2048, + "model_type": "mixtral", + "num_attention_heads": 64, + "num_experts_per_tok": 2, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "num_local_experts": 2, + "output_router_logits": false, + "pad_token_id": 0, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.36.2", + "use_cache": true, + "vocab_size": 32000 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..af905f9f98e9fe364980befe17245097f6e0160c --- /dev/null +++ b/generation_config.json @@ -0,0 +1,8 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "max_length": 4096, + "pad_token_id": 0, + "transformers_version": "4.36.2" +} diff --git a/model-00001-of-00108.safetensors b/model-00001-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..084269acf507f06b405b5f4ca10b2c1353bb5523 --- /dev/null +++ b/model-00001-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2f824bb64db79c44c6ee3779001c5dc9e6dc55b5e38d4553200855b415c99fc +size 4471194712 diff --git a/model-00002-of-00108.safetensors b/model-00002-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf2580ef4733a9a7f954164690bdf9453b6484d2 --- /dev/null +++ b/model-00002-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54d6e5f04cfdf4e6b9fbc1d8cb55f5bd236caa17d04f599ed539bbd788467645 +size 4362208592 diff --git a/model-00003-of-00108.safetensors b/model-00003-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d174439736940b54ca1d1b9101877144b04df435 --- /dev/null +++ b/model-00003-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b66e27fb226ec679a0032837be16bd7a5bdaa28797990f4228f5b814da396da +size 4999677048 diff --git a/model-00004-of-00108.safetensors b/model-00004-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8c03eaa80672edef58638f8e794d361984eb3226 --- /dev/null +++ b/model-00004-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b148aa43369f9a9dbc21a541f344192a9ff408a53067f5261e28aa46e8db7f5d +size 4999676936 diff --git a/model-00005-of-00108.safetensors b/model-00005-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c5e8d74f13c4607e2ef769211b462a96a4961d32 --- /dev/null +++ b/model-00005-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f36a73ec196ac36c5a4d0adc22a8acde8f6ed564f2331baf4a2b3a5c6103f399 +size 4362208592 diff --git a/model-00006-of-00108.safetensors b/model-00006-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a55af8165f2359e4eabc86bd200f8df53e2b7a84 --- /dev/null +++ b/model-00006-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b22a89a1442b632655db36cbcdf26b9837350ab889043cf2ee59bcc253f19aa +size 4362208592 diff --git a/model-00007-of-00108.safetensors b/model-00007-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..91d76e3b4b450686492dfd9aaef9d6a96d89ad6d --- /dev/null +++ b/model-00007-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d63d4fffcbde7a58553b59e61e6e28fe76effe14d2bc31c2b6d3f995da3d4d1 +size 4999677048 diff --git a/model-00008-of-00108.safetensors b/model-00008-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e99fa7f4f078e9bdbfd86538ed57bf98687afa5d --- /dev/null +++ b/model-00008-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f79c9f8a6f6ac527819f2c0fff704e0612501b95bbbf4b20debd6defce94820d +size 4999676936 diff --git a/model-00009-of-00108.safetensors b/model-00009-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..47b8043a8786757447eca54865552ec9860dcc55 --- /dev/null +++ b/model-00009-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b783f77a641b1128b1efc503c6b83bb27761f19c1a8b0661232218a3958fb369 +size 4362208592 diff --git a/model-00010-of-00108.safetensors b/model-00010-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37e8fda931b70e76b6dc9d9ce4f7cb674a6c58ba --- /dev/null +++ b/model-00010-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dab22c10227c6cf866e62af05104148b828a1e0035d4f352523ed134afd0a4a8 +size 4362208592 diff --git a/model-00011-of-00108.safetensors b/model-00011-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af565535fe14dc3377ce7281f2ab4283a224343c --- /dev/null +++ b/model-00011-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af8dd63f7beb6b269620e1f01f392f5f867fee1b48d2dccbc28f364d5567a1b9 +size 4999677048 diff --git a/model-00012-of-00108.safetensors b/model-00012-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffa6dc6ec7d1d7d92ef112141fce8a54dda62189 --- /dev/null +++ b/model-00012-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef19ebdd4a1ca0e929c90c0bcc0ee8ffa9dce79709fa5c3f4897cca2f441908d +size 4999676936 diff --git a/model-00013-of-00108.safetensors b/model-00013-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..924a6418d48888e19dbf82870936ec4a2ed1a785 --- /dev/null +++ b/model-00013-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9de47a5cc712db7ada962e97e46efae918e49ece332cdaaa67ff644f4d02c6f9 +size 4362208592 diff --git a/model-00014-of-00108.safetensors b/model-00014-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee3109e6e1236ae39dd6a1f1151bac58ca6abd8e --- /dev/null +++ b/model-00014-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:865af2acaf773da401f01de675f8aa92a563d92be966c690ae61e01ddae2e927 +size 4362208600 diff --git a/model-00015-of-00108.safetensors b/model-00015-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..371118aac81f03593172e9ba2297f2ffe05c88a3 --- /dev/null +++ b/model-00015-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfaa7902c960646dbd72f312db796c2b5e620f75d0568bbd5b2efc96d6c1d5b6 +size 4999677056 diff --git a/model-00016-of-00108.safetensors b/model-00016-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6ed4a3e68fe370ebc82a0d164499234e98ef7c0 --- /dev/null +++ b/model-00016-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:910a7de4b11d0418bf34cdf363f054b33ac33984b15062940a64d8daf069bf6f +size 4999676944 diff --git a/model-00017-of-00108.safetensors b/model-00017-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8976e8dacbfa3b6eae227153bc1f71e5ec821ba8 --- /dev/null +++ b/model-00017-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c0b4ec25d85e719117aa193d9a7ee77d82adcf15717538afb00dc8886277891 +size 4362208600 diff --git a/model-00018-of-00108.safetensors b/model-00018-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea50677d167f95fb8b99498ac45511acbb5f5e7c --- /dev/null +++ b/model-00018-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50a44463d600dced8ac95ebe465238b81c69a790f3a218693e53cb4dd93e8fe9 +size 4362208608 diff --git a/model-00019-of-00108.safetensors b/model-00019-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..908a6692b2cea1b26ed1ce0510cb8fe33d671318 --- /dev/null +++ b/model-00019-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03f1914b0e639222792e12e98b0096bbe58596497a11ee4c4cacfb9a500aabb0 +size 4999677056 diff --git a/model-00020-of-00108.safetensors b/model-00020-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fba1988c65170e336fdee7bd2dead38cbeb70934 --- /dev/null +++ b/model-00020-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62adbb7e1235c2c8f2e8e60efe7448fbae5cc97dfd93520ca3a738cbaa368c36 +size 4999676944 diff --git a/model-00021-of-00108.safetensors b/model-00021-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..148c507856f0e6b611ca60f3ab7a54e51c4f224c --- /dev/null +++ b/model-00021-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aefbdca5681a2d12e2e843013d30baf432622b1034e4560c5581aaf0f89ac77d +size 4362208600 diff --git a/model-00022-of-00108.safetensors b/model-00022-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4923c7086a731881b05c8e75d55fd2573bada9ad --- /dev/null +++ b/model-00022-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b5957b1c15b08e8b9778305167d7f766aed280b889e236a55d8f71371d20bb2 +size 4362208608 diff --git a/model-00023-of-00108.safetensors b/model-00023-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42e8f18ed12a7cb9cdf53269141f17669c12790b --- /dev/null +++ b/model-00023-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aea10e16ccdf0dc1adbcd669291549231bf579abd2afc390b027d0750a364156 +size 4999677056 diff --git a/model-00024-of-00108.safetensors b/model-00024-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d2827727cea51a0898d3dc754107ca7173bba935 --- /dev/null +++ b/model-00024-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3d5a24c0476050ffb7cd6248c8afed5548380d6c2ce610007fa7c821d2ed806 +size 4999676944 diff --git a/model-00025-of-00108.safetensors b/model-00025-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a7d2d02a70340133bb803a52660a4e6f44e2c40 --- /dev/null +++ b/model-00025-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b15196fde88434bba20f68b289ffdd2d23a9b9488b67881d076af60db6742c0b +size 4362208600 diff --git a/model-00026-of-00108.safetensors b/model-00026-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a698fb2e5e21d870f4967b952e1100c0a817451e --- /dev/null +++ b/model-00026-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e77258c981ca81688cca2a3cfea0a2f685b43bade1d7faffaf5904840c9c57f1 +size 4362208608 diff --git a/model-00027-of-00108.safetensors b/model-00027-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8fe4f5c47c662190f4a580199f7f7f62242c94c5 --- /dev/null +++ b/model-00027-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f78387a16253e86b0a68ec27717185b7644c515a6de4f58f5c3d11aa155df80 +size 4999677056 diff --git a/model-00028-of-00108.safetensors b/model-00028-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9f242f10716c8f2080555de005a34dc72f74627 --- /dev/null +++ b/model-00028-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:653ae7f35624ae37ec823b22aeac85ecbbab7ff3699dbc4ac0b166b528fb38d6 +size 4999676944 diff --git a/model-00029-of-00108.safetensors b/model-00029-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8b1dab326a43fcd4fd2cb236f763d6511961309d --- /dev/null +++ b/model-00029-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a558525d6bf0fa89b5170d4b07965e5f66752e8d74de482d6845d94c0ccc59c7 +size 4362208600 diff --git a/model-00030-of-00108.safetensors b/model-00030-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10e19fa79faceee90a86036bd5b8a1cc46ec4f06 --- /dev/null +++ b/model-00030-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac005fd0c955eb607f137360fe5f35524083386375d35f44d7938c86fd3a3104 +size 4362208608 diff --git a/model-00031-of-00108.safetensors b/model-00031-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a00494a86f070003709290e07db40e83272decbe --- /dev/null +++ b/model-00031-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f0ab44c5b31ef54a2f86ea567153c03c255dc36855f1037887d58c2a7bcbb07 +size 4999677056 diff --git a/model-00032-of-00108.safetensors b/model-00032-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..603f240a721c868e684bcbfddbd55081ce7b3cd3 --- /dev/null +++ b/model-00032-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c558eefa73514903466f8a4482de7a14d63828e93f11f386b60fc6e0a9ed160 +size 4999676944 diff --git a/model-00033-of-00108.safetensors b/model-00033-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60c914f70657ed45c5ebbf06c7bbd94b469840fe --- /dev/null +++ b/model-00033-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbdbda3b6d6db73b5727cb2cee47955ec8a40c9093218e3f0b3933bc0dcbfe33 +size 4362208600 diff --git a/model-00034-of-00108.safetensors b/model-00034-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..813eb1fa49ce3a06f86894097eb9474d31e1c9bb --- /dev/null +++ b/model-00034-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c92b5e960bef70bd682229a1d27f221e040201941ecdedefb2351f08a4db486c +size 4362208608 diff --git a/model-00035-of-00108.safetensors b/model-00035-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e2dc47f1b51e36445991c407b613f97af9bed512 --- /dev/null +++ b/model-00035-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:302381c7554795967f5687ee5425e7d878c41ab08460f52e34f86d8d59bca165 +size 4999677056 diff --git a/model-00036-of-00108.safetensors b/model-00036-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ba2bf678ee76b3798d6984d07b4affbfd3e6d46 --- /dev/null +++ b/model-00036-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a197c32cbc987115a2fdf7c29106394130077dfa2bdddd19bcb3d1cea855d6d +size 4999676944 diff --git a/model-00037-of-00108.safetensors b/model-00037-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d1cd314c5a2502da6c7a83c21f96713499816236 --- /dev/null +++ b/model-00037-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b39f4401c9f9b85d573046f66f67c2ab408cc06aff7fe5588c9c356f89c837f6 +size 4362208600 diff --git a/model-00038-of-00108.safetensors b/model-00038-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37ed07bde0a609a6568e080fba5ae3ea54fc7944 --- /dev/null +++ b/model-00038-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36b8b69b734f5d720cb92945460aa3ea1514d798b5effbd2ae773aa9dfd5b8f8 +size 4362208608 diff --git a/model-00039-of-00108.safetensors b/model-00039-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d1e7f25167e5cb9e5488fc4bd3bdb0e1c9e387c8 --- /dev/null +++ b/model-00039-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96296c65fe4397b6003209e223b0dc0272fcb4c3c22442116ff688127094da39 +size 4999677056 diff --git a/model-00040-of-00108.safetensors b/model-00040-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..551ca5012e58ff9ac5c8a441a9ece2ac9ee149bc --- /dev/null +++ b/model-00040-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d018fcc4b0c44d76cf42b81d77fb0a09da74377821324f6e66010adf878ea68 +size 4999676944 diff --git a/model-00041-of-00108.safetensors b/model-00041-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f02d05eec4adf358923acd9213bed52d570c5dff --- /dev/null +++ b/model-00041-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:545d84a4527e8f94a21c0f58760140d4596f6c0d0df405429b87594f2cb52e71 +size 4362208600 diff --git a/model-00042-of-00108.safetensors b/model-00042-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..edec9a654a92ecbe04d99e06afa58a634debc5df --- /dev/null +++ b/model-00042-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25bf8408bad815366cbf9422df9f4c68d3445f3390e8942bb479851f6f8dec46 +size 4362208608 diff --git a/model-00043-of-00108.safetensors b/model-00043-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9709400c8b809db98a63d7ac5b619a8c33db780 --- /dev/null +++ b/model-00043-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d80566348bde16d627937ee7dd0d330273b8d9a1b66060302f1d00cea592175 +size 4999677056 diff --git a/model-00044-of-00108.safetensors b/model-00044-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c94bcfecdc566332c62025f70fa3f467ab46c9a --- /dev/null +++ b/model-00044-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:080584e45195a99c6343fe832712595667cdcfa9c6065c2f89cccd3d406cf22a +size 4999676944 diff --git a/model-00045-of-00108.safetensors b/model-00045-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24534e8f5e8b135aad789d565cfca94cfb9d5d73 --- /dev/null +++ b/model-00045-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:528228bab9247fdd5114b895f8fbdce5deed6737b2dc416ead16b378952d9b7d +size 4362208600 diff --git a/model-00046-of-00108.safetensors b/model-00046-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9bfe5f770b5557b26907186df8d6953a71998234 --- /dev/null +++ b/model-00046-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ed8e73d6b66107b5b528cd769837043b696aa8cbf7bb3f22fe747b264dabec5 +size 4362208608 diff --git a/model-00047-of-00108.safetensors b/model-00047-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d5459948fbff7fcd59f58b120e6155a5f8e8517e --- /dev/null +++ b/model-00047-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b21d23ba8abd05afb246bfe31a13e272f612619b992c6ae47c99093f65889ecf +size 4999677056 diff --git a/model-00048-of-00108.safetensors b/model-00048-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c95c08b11f11c4fef0a4d4a1c542b6a28967af5b --- /dev/null +++ b/model-00048-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e293a2e232a879938dea305c09acc53718dbdd0b73e4d3bc574220bbaaf044ef +size 4999676944 diff --git a/model-00049-of-00108.safetensors b/model-00049-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..390354037c4e12ad12b739deed70f44ae57e66f5 --- /dev/null +++ b/model-00049-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2570e63eb3ead2dfb3fccd8723ea9746d29e649d0142d8e9c247ca2ef4b2bdbb +size 4362208600 diff --git a/model-00050-of-00108.safetensors b/model-00050-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e2573418e2cf73874e04b64b9208073a74bea51d --- /dev/null +++ b/model-00050-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b4df06535a343ae2a50c114c4df71535cd7386e377936afbd8932a2ae85cd4d +size 4362208608 diff --git a/model-00051-of-00108.safetensors b/model-00051-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..51cf2b7b1d84f3abf4712b740ac6f464b6a307d4 --- /dev/null +++ b/model-00051-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c91c940847ccd85fa541f47c23f86e254b2550a9c1ccaff2cff190a0c8166804 +size 4999677056 diff --git a/model-00052-of-00108.safetensors b/model-00052-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd86bb1a68f8cdc56ba0732d7c27ed7570d2081e --- /dev/null +++ b/model-00052-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6af890ef16b790975e10d30dc0e17cc569de5a978a4eea50d205aa96d5d77dde +size 4999676944 diff --git a/model-00053-of-00108.safetensors b/model-00053-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d301041d76c71f2d7570615acb204cd6055c499a --- /dev/null +++ b/model-00053-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fca6851d8589bfa780b1974cd087cda0520d40bc41279c4f8bf47bd3d9cb74fa +size 4362208600 diff --git a/model-00054-of-00108.safetensors b/model-00054-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1fb6f77f98e134e6ee9f9dc8d9b9949a4fcaffaf --- /dev/null +++ b/model-00054-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3145b2a2cef40a63bb8346c6c5f079361902a9b367560cde6815aaed72c83ccd +size 4362208608 diff --git a/model-00055-of-00108.safetensors b/model-00055-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1fb3b33d5ce8fb8f1b7e750e76786202850fdebe --- /dev/null +++ b/model-00055-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:129012d16480689878f5bae9c84ce11b22422a28c622e92932cf9255bcddd935 +size 4999677056 diff --git a/model-00056-of-00108.safetensors b/model-00056-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20e47a7101939c2f7c44a5591f1f90f3336ad8f5 --- /dev/null +++ b/model-00056-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:082d02f9d713046bf87e753900439d1e7adbaebae333bc8a407782edd85c1aa6 +size 4999676944 diff --git a/model-00057-of-00108.safetensors b/model-00057-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1ab1fc58fa7e83dc3b0051b9a9a2df324cc41e4 --- /dev/null +++ b/model-00057-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a73acc1fde56ae4e8c75b1f73902f64be3236e468e6134ab05b6cd43e5286a9e +size 4362208600 diff --git a/model-00058-of-00108.safetensors b/model-00058-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bed87b626bab427b5f82c5e0462985136e998145 --- /dev/null +++ b/model-00058-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a4cfcd0bc1047a5a4a931aa0c0546c8274c7c4b4a7361123a3088bcf2bb8d1b +size 4362208608 diff --git a/model-00059-of-00108.safetensors b/model-00059-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c7d398013d340ab3430aa1ec95d2f761963ecd88 --- /dev/null +++ b/model-00059-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2f9e9dda6992fc4c4f65e31631352fee073f9e6a970774c363b2ee097304602 +size 4999677056 diff --git a/model-00060-of-00108.safetensors b/model-00060-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00e556e772d400628bf9eccdf4956bab98965c7a --- /dev/null +++ b/model-00060-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9dce52b6d75dfb3c937f8803816e4849ff3a696e74a84cb820826a5b7aac72d +size 4999676944 diff --git a/model-00061-of-00108.safetensors b/model-00061-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5a1bba4d3952af551cb0d7d1490517f520c982d7 --- /dev/null +++ b/model-00061-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:505db3d129678300929bc40908a4188eedf3d5e7f9937f96439473d799aa438b +size 4362208600 diff --git a/model-00062-of-00108.safetensors b/model-00062-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e9815a30398b320151d3051c1d339561cc7020d3 --- /dev/null +++ b/model-00062-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e152fa68664187ccc37c70d850f413424d72bf00e7e26c757a723cf2343b9279 +size 4362208608 diff --git a/model-00063-of-00108.safetensors b/model-00063-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cad8a7ecc9965a399cbb5ca3740d0cbfcf167190 --- /dev/null +++ b/model-00063-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82fab49d735ed66718278e70792c04dae354797a3161e28c5ca4242a60684048 +size 4999677056 diff --git a/model-00064-of-00108.safetensors b/model-00064-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3537ec6b32e53f844cb733250110dc0297b388a --- /dev/null +++ b/model-00064-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b882b70cd38d44e305bf6a593f8875718bca669a86057ec63df5f0d94a6a6ac +size 4999676944 diff --git a/model-00065-of-00108.safetensors b/model-00065-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..002813c9f1b54c65be3c6ef0babc0159a946b0ea --- /dev/null +++ b/model-00065-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:769417008986734a1706757bb1eecc49666409fbe142b851ab340d4766f7edda +size 4362208600 diff --git a/model-00066-of-00108.safetensors b/model-00066-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..72b7c6c509c83c162ca39183a9144bb5decd31d5 --- /dev/null +++ b/model-00066-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b9abdb40f0cece0775c6d72986490fa869ec35ec306731959e1f35d1032f058 +size 4362208608 diff --git a/model-00067-of-00108.safetensors b/model-00067-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08842f8a8e75a383e816548c48dc6a784173b99c --- /dev/null +++ b/model-00067-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b9873ecd6c919a022acb6dd1cff61a7e5b80885f21e69429ea1b9ad63e7bd6e +size 4999677056 diff --git a/model-00068-of-00108.safetensors b/model-00068-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6a0dfb0e0acf87cc16ab760850a82a6d9ca39d22 --- /dev/null +++ b/model-00068-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbdba8895b1beb0e9a4f51485ecb817eba678269ee6a1f6b7b6d7018e775f5b8 +size 4999676944 diff --git a/model-00069-of-00108.safetensors b/model-00069-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25c1fa8fe3175a9b9b85cfce715e323791509f32 --- /dev/null +++ b/model-00069-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcf8b3af2b1eedab8c767daabc1531ecccf6a64c79675d8ace1c5c977eac3d54 +size 4362208600 diff --git a/model-00070-of-00108.safetensors b/model-00070-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b79a3278314c5142d2f37aa048bce65b275f095d --- /dev/null +++ b/model-00070-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87b990738777634dcb667c7fa5edb7323782398c051d9fc018dfe65e594bc90b +size 4362208608 diff --git a/model-00071-of-00108.safetensors b/model-00071-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bff61c4d7068d1b0b4f96d7e725c09f654140e84 --- /dev/null +++ b/model-00071-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1878e2064a8eda7f1f957630208cebba37a18e883c9f3eab12112b9a53f4643 +size 4999677056 diff --git a/model-00072-of-00108.safetensors b/model-00072-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2831e940599eaf48552081e2882b790ba0aa7538 --- /dev/null +++ b/model-00072-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7eb1a3d6e48a3f34f191b78ce4f54107e13862f0bd748afeb4879c2a9310ee2 +size 4999676944 diff --git a/model-00073-of-00108.safetensors b/model-00073-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b16c456f62ef10b34fc6ad60d214154d64cebcd6 --- /dev/null +++ b/model-00073-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee870fb6db9eaca7f2da535488bcfabe223b37c38846600f59d8d5a5ea60c9c2 +size 4362208600 diff --git a/model-00074-of-00108.safetensors b/model-00074-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f1656ded3e7f755d253528054d690358cc477890 --- /dev/null +++ b/model-00074-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d857355a6b9e07872de7bb7080843d62e0cac56304b0d7928fdde42f12b52ae +size 4362208608 diff --git a/model-00075-of-00108.safetensors b/model-00075-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c8e91d90995f46eb21727f81a1da151ed3ed994 --- /dev/null +++ b/model-00075-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:028a72861433254ef9046330df1542087699e9dbf9a090866b78665ac545133c +size 4999677056 diff --git a/model-00076-of-00108.safetensors b/model-00076-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c9854f27ca346deb3663faa9239f1b10d496d75 --- /dev/null +++ b/model-00076-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01b757a330bd9349b48ce9aebca62dcda75188b9b63c147ef13002415fb7109c +size 4999676944 diff --git a/model-00077-of-00108.safetensors b/model-00077-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b19c4af6163a96df1ffdd58c02837fbe55d3f5d0 --- /dev/null +++ b/model-00077-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476589e38f55da72c3ff14080fa18508f94f4e7a1e6f73f4fb2fa864727adeb4 +size 4362208600 diff --git a/model-00078-of-00108.safetensors b/model-00078-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7db441f77d7c87a93626675c095a00b5c52f9f45 --- /dev/null +++ b/model-00078-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed2ae3ffc356f7c97cf992696e7e24919420fb2e1484f5e685e40957cac95345 +size 4362208608 diff --git a/model-00079-of-00108.safetensors b/model-00079-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eb208fec0b9ceb2ce9139bf4b01e30543439c4ae --- /dev/null +++ b/model-00079-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04ac0daa8eb20aeec2ceb08009ed104d4cb4c90ad6901e9db0803f40b2b8b5c8 +size 4999677056 diff --git a/model-00080-of-00108.safetensors b/model-00080-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..27fe3fbd1bbeebeb621ba7abfc40d35ac97a97ae --- /dev/null +++ b/model-00080-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d401d026bc66b2a2bb448bb07395c3178c7ff9409c8071ea5fb000962dbfc407 +size 4999676944 diff --git a/model-00081-of-00108.safetensors b/model-00081-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e715f4461c8269c98038b2488ace9b9eca71bf2 --- /dev/null +++ b/model-00081-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09fb3f8b707e9cad60ed797967ba857371e325b10a7dded183ce0f50308f346b +size 4362208600 diff --git a/model-00082-of-00108.safetensors b/model-00082-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..48dd5365adb4aad614011a3988b7a44249f0476a --- /dev/null +++ b/model-00082-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb87cb6b7d506cd14de1300f6c1f363aef9abda47da26313b8bb20e468c3345 +size 4362208608 diff --git a/model-00083-of-00108.safetensors b/model-00083-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9693a17343f552266401d76aeb9553d7801f5297 --- /dev/null +++ b/model-00083-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:499897a887a23dad568213d1f61b5cdb3879b7e08393292cb024e4622e65a065 +size 4999677056 diff --git a/model-00084-of-00108.safetensors b/model-00084-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..75f6ca1df25c9f57115eb2a2ad915bcc8bf59557 --- /dev/null +++ b/model-00084-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25793213b8838d9540bbde3fe1c1aec06face968f0c2a8261d343c9928c8608a +size 4999676944 diff --git a/model-00085-of-00108.safetensors b/model-00085-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14cc4c56f2f5d26cc76eae242eaf0735ea107291 --- /dev/null +++ b/model-00085-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:606f40960ca6fafe200a1d685019e12818c22a59bbd049097c15465a62d69367 +size 4362208600 diff --git a/model-00086-of-00108.safetensors b/model-00086-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..613c0d511d8b955402401f8adb64a4489bda6aff --- /dev/null +++ b/model-00086-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e966267328f8d705661ca06f3a5cd762e6d85b9be03e44a2b37e0eddc72b040 +size 4362208608 diff --git a/model-00087-of-00108.safetensors b/model-00087-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c34da4e4d54dbd2f38717212e8bf2a4f1a07473 --- /dev/null +++ b/model-00087-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba322b073be57ca0cc602c4684aa300bbc140fefd68bc045098eba311d7c03ea +size 4999677056 diff --git a/model-00088-of-00108.safetensors b/model-00088-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd16a5bee58285abce04be647af08eb568828a17 --- /dev/null +++ b/model-00088-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:592c316feb0c60afadf33c4c1ad22ee807b426958a3d51f8c75de08bd4309a8a +size 4999676944 diff --git a/model-00089-of-00108.safetensors b/model-00089-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6c654ebca36e6336783bcba0c3833d53ad779c90 --- /dev/null +++ b/model-00089-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce8c5f20521dab4386aa44949860888b392a92c179b1f7fefa41797a75731dbd +size 4362208600 diff --git a/model-00090-of-00108.safetensors b/model-00090-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bbce62b2bd43a04d89672f396cd9f09e613a0501 --- /dev/null +++ b/model-00090-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08b03b1e75fe9811a1c2e8a517099156ac57279d6bca39863e08bf417152b800 +size 4362208608 diff --git a/model-00091-of-00108.safetensors b/model-00091-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ab2adec8d17e7a52db04351ee585763035dafff2 --- /dev/null +++ b/model-00091-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:373804a67f901dfe53b3f7dc8b2b3890eab48f37a5f14aa4730d6db4646ec2d2 +size 4999677056 diff --git a/model-00092-of-00108.safetensors b/model-00092-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c33716ee840c4cf74e89e1d97235b89f2a3b0c98 --- /dev/null +++ b/model-00092-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08875358027ed419cdf3d4258779f2b603f2e51ace2c64391c07316cfee8759b +size 4999676944 diff --git a/model-00093-of-00108.safetensors b/model-00093-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f57ee47fbe76fe7cc8ff1d0a4a09e184319e32f8 --- /dev/null +++ b/model-00093-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b009a9213453a08bea12437d2efc377260cbda415f1c0d8320f9244d94d063c3 +size 4362208600 diff --git a/model-00094-of-00108.safetensors b/model-00094-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3232c0554884f1be5914a718a61cfc2ca8b5c161 --- /dev/null +++ b/model-00094-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6840f33897e7086c1f8c40f7470523dcd56685b331e9fd446652d5cc56b383a5 +size 4362208608 diff --git a/model-00095-of-00108.safetensors b/model-00095-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dc905c7aeb622cb764d9016d0e07b5b12d3245cf --- /dev/null +++ b/model-00095-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b2a80195c779408326f6cdf2b386226753c9b905e14a9942dd60eef6319b7e2 +size 4999677056 diff --git a/model-00096-of-00108.safetensors b/model-00096-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..46637c14669cfe74ae07107a50467d13c8e531f9 --- /dev/null +++ b/model-00096-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d5c5b41af99629764e0d883f3a1c3249ee3df0f1a014d9482b528c53c32b500 +size 4999676944 diff --git a/model-00097-of-00108.safetensors b/model-00097-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e65f9394d105e9ead5d3af24fc7c21d630f1fbb1 --- /dev/null +++ b/model-00097-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e479d2ff0d40ef9f4393079a126bbefcd12436a1b9f7042b53b3afdab97a2f9 +size 4362208600 diff --git a/model-00098-of-00108.safetensors b/model-00098-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..36bf20c425db4d9d75cd6d9b2810971e9771d0be --- /dev/null +++ b/model-00098-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38eee91ce173aa60eb2e38141ccd1e7ffe95dfa3fe54abf2bd557007b93be159 +size 4362208608 diff --git a/model-00099-of-00108.safetensors b/model-00099-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd9543588a73a5ce6a5eeae834baa6cedc17808c --- /dev/null +++ b/model-00099-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f90586c4310c0d52496a94d15ca1ab00b5960bee99b5f11d839621b1d5a59b87 +size 4999677056 diff --git a/model-00100-of-00108.safetensors b/model-00100-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80630cd45b054c4846a179d63017fa2eb7127a8e --- /dev/null +++ b/model-00100-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56a0acb4c38a6faa3b59c31aa960b7c2111ceb7224abc022c28c619786993acd +size 4999676944 diff --git a/model-00101-of-00108.safetensors b/model-00101-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9332511f11ee499a14181b7882ea7800607d1932 --- /dev/null +++ b/model-00101-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9522bee6d51732d33dcf10de024360e680351832ae4a5fc9f88a6d28468dc0ef +size 4362208600 diff --git a/model-00102-of-00108.safetensors b/model-00102-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec93a411e108fb7b1ba137205d60256f63766c72 --- /dev/null +++ b/model-00102-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2701dd69a04f0de432c07737d5ef15518ebc3cac5134ad8e80f550784ee01c27 +size 4362208608 diff --git a/model-00103-of-00108.safetensors b/model-00103-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..727c97d03f1558360ffe47b5d812aee5ddd2185b --- /dev/null +++ b/model-00103-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a7df8f17e2834304feac0e7f78b4a79f45d86e470c3e1cda5f15fe2a8ebec69 +size 4999677056 diff --git a/model-00104-of-00108.safetensors b/model-00104-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad78324d0fa7d2bbb5f314aa65a174752f62e94b --- /dev/null +++ b/model-00104-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd59857559f3bb3b1f40587d39e2de7db6c7efa28258f424800675baa4d121be +size 4999676944 diff --git a/model-00105-of-00108.safetensors b/model-00105-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..281072951518d987208891a66ebccd15fb9f7b0c --- /dev/null +++ b/model-00105-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61990203ef1ee3c1077bbdb1741dfa2763b9c64bf1414bb0ac13bd2c61b8bda6 +size 4362208600 diff --git a/model-00106-of-00108.safetensors b/model-00106-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26111c019ee71b55fb970e9381dffa3e33234a38 --- /dev/null +++ b/model-00106-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b00276e8ae261cc0a1ae47048f35c3480a42813a145a68ed223ad0d4eade63ae +size 4362208608 diff --git a/model-00107-of-00108.safetensors b/model-00107-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad2e61db60deb6ea64d9845b5ced6ef99654ad12 --- /dev/null +++ b/model-00107-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b09c0b12f810f8639b21c7be701ae2f9dfb0e86a00873b7c3ebb612e9f78d8b +size 4697719792 diff --git a/model-00108-of-00108.safetensors b/model-00108-of-00108.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d13cfc5a4685c076470e39c9da537e2d64273c6e --- /dev/null +++ b/model-00108-of-00108.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c673f6756d99cf39aa315ba345c74b378ed7eb931fbc880e718be2fe1d0ccbd9 +size 1048576128 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..0d9555946b12b3d04c174a26f42e1133a0138901 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,1050 @@ +{ + "metadata": { + "total_size": 501397618688 + }, + "weight_map": { + "lm_head.weight": "model-00108-of-00108.safetensors", + "model.embed_tokens.weight": "model-00001-of-00108.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00108.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00108.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00108.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00108.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00108.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00108.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00108.safetensors", + "model.layers.0.input_layernorm.weight": "model-00002-of-00108.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00108.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00108.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00108.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00108.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00108.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00108.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00108.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00108.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00108.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00108.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00108.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00002-of-00108.safetensors", + "model.layers.1.input_layernorm.weight": "model-00003-of-00108.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00003-of-00108.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00108.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00108.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00108.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00108.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00014-of-00108.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00108.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00015-of-00108.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00015-of-00108.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00015-of-00108.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00015-of-00108.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00014-of-00108.safetensors", + "model.layers.10.input_layernorm.weight": "model-00015-of-00108.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00015-of-00108.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00014-of-00108.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00014-of-00108.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00014-of-00108.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00014-of-00108.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00016-of-00108.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00016-of-00108.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00108.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00108.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00108.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00017-of-00108.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00016-of-00108.safetensors", + "model.layers.11.input_layernorm.weight": "model-00017-of-00108.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00017-of-00108.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00015-of-00108.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00016-of-00108.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00015-of-00108.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00016-of-00108.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00108.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00108.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00108.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00018-of-00108.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00018-of-00108.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00018-of-00108.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00017-of-00108.safetensors", + "model.layers.12.input_layernorm.weight": "model-00018-of-00108.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00018-of-00108.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00017-of-00108.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00017-of-00108.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00017-of-00108.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00017-of-00108.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00018-of-00108.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00019-of-00108.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00019-of-00108.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00019-of-00108.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00019-of-00108.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00019-of-00108.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00018-of-00108.safetensors", + "model.layers.13.input_layernorm.weight": "model-00019-of-00108.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00019-of-00108.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00018-of-00108.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00018-of-00108.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00018-of-00108.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00018-of-00108.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00108.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00108.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00108.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00108.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00108.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00021-of-00108.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00020-of-00108.safetensors", + "model.layers.14.input_layernorm.weight": "model-00021-of-00108.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00021-of-00108.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00019-of-00108.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00020-of-00108.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00019-of-00108.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00020-of-00108.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00021-of-00108.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00021-of-00108.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00021-of-00108.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00022-of-00108.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00022-of-00108.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00022-of-00108.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00021-of-00108.safetensors", + "model.layers.15.input_layernorm.weight": "model-00022-of-00108.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00022-of-00108.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00021-of-00108.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00021-of-00108.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00021-of-00108.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00021-of-00108.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00022-of-00108.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00108.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00108.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00108.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00108.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00108.safetensors", + "model.layers.16.block_sparse_moe.gate.weight": "model-00022-of-00108.safetensors", + "model.layers.16.input_layernorm.weight": "model-00023-of-00108.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00023-of-00108.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00022-of-00108.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00022-of-00108.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00022-of-00108.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00022-of-00108.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00024-of-00108.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00024-of-00108.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00024-of-00108.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00024-of-00108.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00024-of-00108.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00025-of-00108.safetensors", + "model.layers.17.block_sparse_moe.gate.weight": "model-00024-of-00108.safetensors", + "model.layers.17.input_layernorm.weight": "model-00025-of-00108.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00025-of-00108.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00023-of-00108.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00024-of-00108.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00023-of-00108.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00024-of-00108.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00025-of-00108.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00025-of-00108.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00025-of-00108.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00108.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00108.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00108.safetensors", + "model.layers.18.block_sparse_moe.gate.weight": "model-00025-of-00108.safetensors", + "model.layers.18.input_layernorm.weight": "model-00026-of-00108.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00026-of-00108.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00025-of-00108.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00025-of-00108.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00025-of-00108.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00025-of-00108.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00108.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00027-of-00108.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00027-of-00108.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00027-of-00108.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00027-of-00108.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00027-of-00108.safetensors", + "model.layers.19.block_sparse_moe.gate.weight": "model-00026-of-00108.safetensors", + "model.layers.19.input_layernorm.weight": "model-00027-of-00108.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00027-of-00108.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00026-of-00108.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00026-of-00108.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00026-of-00108.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00026-of-00108.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00108.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00108.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00108.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00108.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00108.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00108.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00004-of-00108.safetensors", + "model.layers.2.input_layernorm.weight": "model-00005-of-00108.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00005-of-00108.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00003-of-00108.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00004-of-00108.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00003-of-00108.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00004-of-00108.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00028-of-00108.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00028-of-00108.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00028-of-00108.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00028-of-00108.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00028-of-00108.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00108.safetensors", + "model.layers.20.block_sparse_moe.gate.weight": "model-00028-of-00108.safetensors", + "model.layers.20.input_layernorm.weight": "model-00029-of-00108.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00029-of-00108.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00027-of-00108.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00028-of-00108.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00027-of-00108.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00028-of-00108.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00108.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00108.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00108.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00030-of-00108.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00030-of-00108.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00030-of-00108.safetensors", + "model.layers.21.block_sparse_moe.gate.weight": "model-00029-of-00108.safetensors", + "model.layers.21.input_layernorm.weight": "model-00030-of-00108.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00030-of-00108.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00029-of-00108.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00029-of-00108.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00029-of-00108.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00029-of-00108.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00030-of-00108.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00031-of-00108.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00031-of-00108.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00031-of-00108.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00031-of-00108.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00031-of-00108.safetensors", + "model.layers.22.block_sparse_moe.gate.weight": "model-00030-of-00108.safetensors", + "model.layers.22.input_layernorm.weight": "model-00031-of-00108.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00031-of-00108.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00030-of-00108.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00030-of-00108.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00030-of-00108.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00030-of-00108.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00108.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00108.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00108.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00108.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00032-of-00108.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00033-of-00108.safetensors", + "model.layers.23.block_sparse_moe.gate.weight": "model-00032-of-00108.safetensors", + "model.layers.23.input_layernorm.weight": "model-00033-of-00108.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00033-of-00108.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00031-of-00108.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00032-of-00108.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00031-of-00108.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00032-of-00108.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00033-of-00108.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00033-of-00108.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00033-of-00108.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00034-of-00108.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00034-of-00108.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00034-of-00108.safetensors", + "model.layers.24.block_sparse_moe.gate.weight": "model-00033-of-00108.safetensors", + "model.layers.24.input_layernorm.weight": "model-00034-of-00108.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00034-of-00108.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00033-of-00108.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00033-of-00108.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00033-of-00108.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00033-of-00108.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00034-of-00108.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00108.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00108.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00035-of-00108.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00108.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00108.safetensors", + "model.layers.25.block_sparse_moe.gate.weight": "model-00034-of-00108.safetensors", + "model.layers.25.input_layernorm.weight": "model-00035-of-00108.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00035-of-00108.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00034-of-00108.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00034-of-00108.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00034-of-00108.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00034-of-00108.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00036-of-00108.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00036-of-00108.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00036-of-00108.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00036-of-00108.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00036-of-00108.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00037-of-00108.safetensors", + "model.layers.26.block_sparse_moe.gate.weight": "model-00036-of-00108.safetensors", + "model.layers.26.input_layernorm.weight": "model-00037-of-00108.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00037-of-00108.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00035-of-00108.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00036-of-00108.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00035-of-00108.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00036-of-00108.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00037-of-00108.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00037-of-00108.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00037-of-00108.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00038-of-00108.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00038-of-00108.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00038-of-00108.safetensors", + "model.layers.27.block_sparse_moe.gate.weight": "model-00037-of-00108.safetensors", + "model.layers.27.input_layernorm.weight": "model-00038-of-00108.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00038-of-00108.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00037-of-00108.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00037-of-00108.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00037-of-00108.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00037-of-00108.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00038-of-00108.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00039-of-00108.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00039-of-00108.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00039-of-00108.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00039-of-00108.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00039-of-00108.safetensors", + "model.layers.28.block_sparse_moe.gate.weight": "model-00038-of-00108.safetensors", + "model.layers.28.input_layernorm.weight": "model-00039-of-00108.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00039-of-00108.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00038-of-00108.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00038-of-00108.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00038-of-00108.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00038-of-00108.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00040-of-00108.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00040-of-00108.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00040-of-00108.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00040-of-00108.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00040-of-00108.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00041-of-00108.safetensors", + "model.layers.29.block_sparse_moe.gate.weight": "model-00040-of-00108.safetensors", + "model.layers.29.input_layernorm.weight": "model-00041-of-00108.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00041-of-00108.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00039-of-00108.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00040-of-00108.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00039-of-00108.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00040-of-00108.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00108.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00108.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00108.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00108.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00108.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00108.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00005-of-00108.safetensors", + "model.layers.3.input_layernorm.weight": "model-00006-of-00108.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00006-of-00108.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00005-of-00108.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00005-of-00108.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00005-of-00108.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00005-of-00108.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00041-of-00108.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00041-of-00108.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00108.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00042-of-00108.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00042-of-00108.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00042-of-00108.safetensors", + "model.layers.30.block_sparse_moe.gate.weight": "model-00041-of-00108.safetensors", + "model.layers.30.input_layernorm.weight": "model-00042-of-00108.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00042-of-00108.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00041-of-00108.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00041-of-00108.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00041-of-00108.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00041-of-00108.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00042-of-00108.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00043-of-00108.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00043-of-00108.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00043-of-00108.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00043-of-00108.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00043-of-00108.safetensors", + "model.layers.31.block_sparse_moe.gate.weight": "model-00042-of-00108.safetensors", + "model.layers.31.input_layernorm.weight": "model-00043-of-00108.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00043-of-00108.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00042-of-00108.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00042-of-00108.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00042-of-00108.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00042-of-00108.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w1.weight": "model-00044-of-00108.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w2.weight": "model-00044-of-00108.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w3.weight": "model-00044-of-00108.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w1.weight": "model-00044-of-00108.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w2.weight": "model-00044-of-00108.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w3.weight": "model-00045-of-00108.safetensors", + "model.layers.32.block_sparse_moe.gate.weight": "model-00044-of-00108.safetensors", + "model.layers.32.input_layernorm.weight": "model-00045-of-00108.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00045-of-00108.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00043-of-00108.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00044-of-00108.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00043-of-00108.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00044-of-00108.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w1.weight": "model-00045-of-00108.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w2.weight": "model-00045-of-00108.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w3.weight": "model-00045-of-00108.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w1.weight": "model-00046-of-00108.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w2.weight": "model-00046-of-00108.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w3.weight": "model-00046-of-00108.safetensors", + "model.layers.33.block_sparse_moe.gate.weight": "model-00045-of-00108.safetensors", + "model.layers.33.input_layernorm.weight": "model-00046-of-00108.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00046-of-00108.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00045-of-00108.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00045-of-00108.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00045-of-00108.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00045-of-00108.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w1.weight": "model-00046-of-00108.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w2.weight": "model-00047-of-00108.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w3.weight": "model-00047-of-00108.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w1.weight": "model-00047-of-00108.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w2.weight": "model-00047-of-00108.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00108.safetensors", + "model.layers.34.block_sparse_moe.gate.weight": "model-00046-of-00108.safetensors", + "model.layers.34.input_layernorm.weight": "model-00047-of-00108.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00047-of-00108.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00046-of-00108.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00046-of-00108.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00046-of-00108.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00046-of-00108.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w1.weight": "model-00048-of-00108.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w2.weight": "model-00048-of-00108.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w3.weight": "model-00048-of-00108.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w1.weight": "model-00048-of-00108.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w2.weight": "model-00048-of-00108.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w3.weight": "model-00049-of-00108.safetensors", + "model.layers.35.block_sparse_moe.gate.weight": "model-00048-of-00108.safetensors", + "model.layers.35.input_layernorm.weight": "model-00049-of-00108.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00049-of-00108.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00047-of-00108.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00048-of-00108.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00047-of-00108.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00048-of-00108.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w1.weight": "model-00049-of-00108.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w2.weight": "model-00049-of-00108.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w3.weight": "model-00049-of-00108.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w1.weight": "model-00050-of-00108.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w2.weight": "model-00050-of-00108.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w3.weight": "model-00050-of-00108.safetensors", + "model.layers.36.block_sparse_moe.gate.weight": "model-00049-of-00108.safetensors", + "model.layers.36.input_layernorm.weight": "model-00050-of-00108.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00050-of-00108.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00049-of-00108.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00049-of-00108.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00049-of-00108.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00049-of-00108.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w1.weight": "model-00050-of-00108.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w2.weight": "model-00051-of-00108.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w3.weight": "model-00051-of-00108.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w1.weight": "model-00051-of-00108.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w2.weight": "model-00051-of-00108.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w3.weight": "model-00051-of-00108.safetensors", + "model.layers.37.block_sparse_moe.gate.weight": "model-00050-of-00108.safetensors", + "model.layers.37.input_layernorm.weight": "model-00051-of-00108.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00051-of-00108.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00050-of-00108.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00050-of-00108.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00050-of-00108.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00050-of-00108.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w1.weight": "model-00052-of-00108.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w2.weight": "model-00052-of-00108.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w3.weight": "model-00052-of-00108.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w1.weight": "model-00052-of-00108.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w2.weight": "model-00052-of-00108.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w3.weight": "model-00053-of-00108.safetensors", + "model.layers.38.block_sparse_moe.gate.weight": "model-00052-of-00108.safetensors", + "model.layers.38.input_layernorm.weight": "model-00053-of-00108.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00053-of-00108.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00051-of-00108.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00052-of-00108.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00051-of-00108.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00052-of-00108.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w1.weight": "model-00053-of-00108.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w2.weight": "model-00053-of-00108.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w3.weight": "model-00053-of-00108.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w1.weight": "model-00054-of-00108.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w2.weight": "model-00054-of-00108.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w3.weight": "model-00054-of-00108.safetensors", + "model.layers.39.block_sparse_moe.gate.weight": "model-00053-of-00108.safetensors", + "model.layers.39.input_layernorm.weight": "model-00054-of-00108.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00054-of-00108.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00053-of-00108.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00053-of-00108.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00053-of-00108.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00053-of-00108.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00108.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00108.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00108.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00108.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00108.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00007-of-00108.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00006-of-00108.safetensors", + "model.layers.4.input_layernorm.weight": "model-00007-of-00108.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00007-of-00108.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00006-of-00108.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00006-of-00108.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00006-of-00108.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00006-of-00108.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w1.weight": "model-00054-of-00108.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w2.weight": "model-00055-of-00108.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w3.weight": "model-00055-of-00108.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w1.weight": "model-00055-of-00108.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w2.weight": "model-00055-of-00108.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w3.weight": "model-00055-of-00108.safetensors", + "model.layers.40.block_sparse_moe.gate.weight": "model-00054-of-00108.safetensors", + "model.layers.40.input_layernorm.weight": "model-00055-of-00108.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00055-of-00108.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00054-of-00108.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00054-of-00108.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00054-of-00108.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00054-of-00108.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w1.weight": "model-00056-of-00108.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w2.weight": "model-00056-of-00108.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w3.weight": "model-00056-of-00108.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w1.weight": "model-00056-of-00108.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w2.weight": "model-00056-of-00108.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w3.weight": "model-00057-of-00108.safetensors", + "model.layers.41.block_sparse_moe.gate.weight": "model-00056-of-00108.safetensors", + "model.layers.41.input_layernorm.weight": "model-00057-of-00108.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00057-of-00108.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00055-of-00108.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00056-of-00108.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00055-of-00108.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00056-of-00108.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w1.weight": "model-00057-of-00108.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w2.weight": "model-00057-of-00108.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w3.weight": "model-00057-of-00108.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w1.weight": "model-00058-of-00108.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w2.weight": "model-00058-of-00108.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w3.weight": "model-00058-of-00108.safetensors", + "model.layers.42.block_sparse_moe.gate.weight": "model-00057-of-00108.safetensors", + "model.layers.42.input_layernorm.weight": "model-00058-of-00108.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00058-of-00108.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00057-of-00108.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00057-of-00108.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00057-of-00108.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00057-of-00108.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w1.weight": "model-00058-of-00108.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w2.weight": "model-00059-of-00108.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w3.weight": "model-00059-of-00108.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w1.weight": "model-00059-of-00108.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w2.weight": "model-00059-of-00108.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w3.weight": "model-00059-of-00108.safetensors", + "model.layers.43.block_sparse_moe.gate.weight": "model-00058-of-00108.safetensors", + "model.layers.43.input_layernorm.weight": "model-00059-of-00108.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00059-of-00108.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00058-of-00108.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00058-of-00108.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00058-of-00108.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00058-of-00108.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w1.weight": "model-00060-of-00108.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w2.weight": "model-00060-of-00108.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w3.weight": "model-00060-of-00108.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w1.weight": "model-00060-of-00108.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w2.weight": "model-00060-of-00108.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w3.weight": "model-00061-of-00108.safetensors", + "model.layers.44.block_sparse_moe.gate.weight": "model-00060-of-00108.safetensors", + "model.layers.44.input_layernorm.weight": "model-00061-of-00108.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00061-of-00108.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00059-of-00108.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00060-of-00108.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00059-of-00108.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00060-of-00108.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w1.weight": "model-00061-of-00108.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w2.weight": "model-00061-of-00108.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w3.weight": "model-00061-of-00108.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w1.weight": "model-00062-of-00108.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w2.weight": "model-00062-of-00108.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w3.weight": "model-00062-of-00108.safetensors", + "model.layers.45.block_sparse_moe.gate.weight": "model-00061-of-00108.safetensors", + "model.layers.45.input_layernorm.weight": "model-00062-of-00108.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00062-of-00108.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00061-of-00108.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00061-of-00108.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00061-of-00108.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00061-of-00108.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w1.weight": "model-00062-of-00108.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w2.weight": "model-00063-of-00108.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w3.weight": "model-00063-of-00108.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w1.weight": "model-00063-of-00108.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w2.weight": "model-00063-of-00108.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w3.weight": "model-00063-of-00108.safetensors", + "model.layers.46.block_sparse_moe.gate.weight": "model-00062-of-00108.safetensors", + "model.layers.46.input_layernorm.weight": "model-00063-of-00108.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00063-of-00108.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00062-of-00108.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00062-of-00108.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00062-of-00108.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00062-of-00108.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w1.weight": "model-00064-of-00108.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w2.weight": "model-00064-of-00108.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w3.weight": "model-00064-of-00108.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w1.weight": "model-00064-of-00108.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w2.weight": "model-00064-of-00108.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w3.weight": "model-00065-of-00108.safetensors", + "model.layers.47.block_sparse_moe.gate.weight": "model-00064-of-00108.safetensors", + "model.layers.47.input_layernorm.weight": "model-00065-of-00108.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00065-of-00108.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00063-of-00108.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00064-of-00108.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00063-of-00108.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00064-of-00108.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w1.weight": "model-00065-of-00108.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w2.weight": "model-00065-of-00108.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w3.weight": "model-00065-of-00108.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w1.weight": "model-00066-of-00108.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w2.weight": "model-00066-of-00108.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w3.weight": "model-00066-of-00108.safetensors", + "model.layers.48.block_sparse_moe.gate.weight": "model-00065-of-00108.safetensors", + "model.layers.48.input_layernorm.weight": "model-00066-of-00108.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00066-of-00108.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00065-of-00108.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00065-of-00108.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00065-of-00108.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00065-of-00108.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w1.weight": "model-00066-of-00108.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w2.weight": "model-00067-of-00108.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w3.weight": "model-00067-of-00108.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w1.weight": "model-00067-of-00108.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w2.weight": "model-00067-of-00108.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w3.weight": "model-00067-of-00108.safetensors", + "model.layers.49.block_sparse_moe.gate.weight": "model-00066-of-00108.safetensors", + "model.layers.49.input_layernorm.weight": "model-00067-of-00108.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00067-of-00108.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00066-of-00108.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00066-of-00108.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00066-of-00108.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00066-of-00108.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00108.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00108.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00008-of-00108.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00108.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00008-of-00108.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00009-of-00108.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00008-of-00108.safetensors", + "model.layers.5.input_layernorm.weight": "model-00009-of-00108.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00009-of-00108.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00007-of-00108.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00008-of-00108.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00007-of-00108.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00008-of-00108.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w1.weight": "model-00068-of-00108.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w2.weight": "model-00068-of-00108.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w3.weight": "model-00068-of-00108.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w1.weight": "model-00068-of-00108.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w2.weight": "model-00068-of-00108.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w3.weight": "model-00069-of-00108.safetensors", + "model.layers.50.block_sparse_moe.gate.weight": "model-00068-of-00108.safetensors", + "model.layers.50.input_layernorm.weight": "model-00069-of-00108.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00069-of-00108.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00067-of-00108.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00068-of-00108.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00067-of-00108.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00068-of-00108.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w1.weight": "model-00069-of-00108.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w2.weight": "model-00069-of-00108.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w3.weight": "model-00069-of-00108.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w1.weight": "model-00070-of-00108.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w2.weight": "model-00070-of-00108.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w3.weight": "model-00070-of-00108.safetensors", + "model.layers.51.block_sparse_moe.gate.weight": "model-00069-of-00108.safetensors", + "model.layers.51.input_layernorm.weight": "model-00070-of-00108.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00070-of-00108.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00069-of-00108.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00069-of-00108.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00069-of-00108.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00069-of-00108.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w1.weight": "model-00070-of-00108.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w2.weight": "model-00071-of-00108.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w3.weight": "model-00071-of-00108.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w1.weight": "model-00071-of-00108.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w2.weight": "model-00071-of-00108.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w3.weight": "model-00071-of-00108.safetensors", + "model.layers.52.block_sparse_moe.gate.weight": "model-00070-of-00108.safetensors", + "model.layers.52.input_layernorm.weight": "model-00071-of-00108.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00071-of-00108.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00070-of-00108.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00070-of-00108.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00070-of-00108.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00070-of-00108.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w1.weight": "model-00072-of-00108.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w2.weight": "model-00072-of-00108.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w3.weight": "model-00072-of-00108.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w1.weight": "model-00072-of-00108.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w2.weight": "model-00072-of-00108.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w3.weight": "model-00073-of-00108.safetensors", + "model.layers.53.block_sparse_moe.gate.weight": "model-00072-of-00108.safetensors", + "model.layers.53.input_layernorm.weight": "model-00073-of-00108.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00073-of-00108.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00071-of-00108.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00072-of-00108.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00071-of-00108.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00072-of-00108.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w1.weight": "model-00073-of-00108.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w2.weight": "model-00073-of-00108.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w3.weight": "model-00073-of-00108.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w1.weight": "model-00074-of-00108.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w2.weight": "model-00074-of-00108.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w3.weight": "model-00074-of-00108.safetensors", + "model.layers.54.block_sparse_moe.gate.weight": "model-00073-of-00108.safetensors", + "model.layers.54.input_layernorm.weight": "model-00074-of-00108.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00074-of-00108.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00073-of-00108.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00073-of-00108.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00073-of-00108.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00073-of-00108.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w1.weight": "model-00074-of-00108.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w2.weight": "model-00075-of-00108.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w3.weight": "model-00075-of-00108.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w1.weight": "model-00075-of-00108.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w2.weight": "model-00075-of-00108.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w3.weight": "model-00075-of-00108.safetensors", + "model.layers.55.block_sparse_moe.gate.weight": "model-00074-of-00108.safetensors", + "model.layers.55.input_layernorm.weight": "model-00075-of-00108.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00075-of-00108.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00074-of-00108.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00074-of-00108.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00074-of-00108.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00074-of-00108.safetensors", + "model.layers.56.block_sparse_moe.experts.0.w1.weight": "model-00076-of-00108.safetensors", + "model.layers.56.block_sparse_moe.experts.0.w2.weight": "model-00076-of-00108.safetensors", + "model.layers.56.block_sparse_moe.experts.0.w3.weight": "model-00076-of-00108.safetensors", + "model.layers.56.block_sparse_moe.experts.1.w1.weight": "model-00076-of-00108.safetensors", + "model.layers.56.block_sparse_moe.experts.1.w2.weight": "model-00076-of-00108.safetensors", + "model.layers.56.block_sparse_moe.experts.1.w3.weight": "model-00077-of-00108.safetensors", + "model.layers.56.block_sparse_moe.gate.weight": "model-00076-of-00108.safetensors", + "model.layers.56.input_layernorm.weight": "model-00077-of-00108.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00077-of-00108.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00075-of-00108.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00076-of-00108.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00075-of-00108.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00076-of-00108.safetensors", + "model.layers.57.block_sparse_moe.experts.0.w1.weight": "model-00077-of-00108.safetensors", + "model.layers.57.block_sparse_moe.experts.0.w2.weight": "model-00077-of-00108.safetensors", + "model.layers.57.block_sparse_moe.experts.0.w3.weight": "model-00077-of-00108.safetensors", + "model.layers.57.block_sparse_moe.experts.1.w1.weight": "model-00078-of-00108.safetensors", + "model.layers.57.block_sparse_moe.experts.1.w2.weight": "model-00078-of-00108.safetensors", + "model.layers.57.block_sparse_moe.experts.1.w3.weight": "model-00078-of-00108.safetensors", + "model.layers.57.block_sparse_moe.gate.weight": "model-00077-of-00108.safetensors", + "model.layers.57.input_layernorm.weight": "model-00078-of-00108.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00078-of-00108.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00077-of-00108.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00077-of-00108.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00077-of-00108.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00077-of-00108.safetensors", + "model.layers.58.block_sparse_moe.experts.0.w1.weight": "model-00078-of-00108.safetensors", + "model.layers.58.block_sparse_moe.experts.0.w2.weight": "model-00079-of-00108.safetensors", + "model.layers.58.block_sparse_moe.experts.0.w3.weight": "model-00079-of-00108.safetensors", + "model.layers.58.block_sparse_moe.experts.1.w1.weight": "model-00079-of-00108.safetensors", + "model.layers.58.block_sparse_moe.experts.1.w2.weight": "model-00079-of-00108.safetensors", + "model.layers.58.block_sparse_moe.experts.1.w3.weight": "model-00079-of-00108.safetensors", + "model.layers.58.block_sparse_moe.gate.weight": "model-00078-of-00108.safetensors", + "model.layers.58.input_layernorm.weight": "model-00079-of-00108.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00079-of-00108.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00078-of-00108.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00078-of-00108.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00078-of-00108.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00078-of-00108.safetensors", + "model.layers.59.block_sparse_moe.experts.0.w1.weight": "model-00080-of-00108.safetensors", + "model.layers.59.block_sparse_moe.experts.0.w2.weight": "model-00080-of-00108.safetensors", + "model.layers.59.block_sparse_moe.experts.0.w3.weight": "model-00080-of-00108.safetensors", + "model.layers.59.block_sparse_moe.experts.1.w1.weight": "model-00080-of-00108.safetensors", + "model.layers.59.block_sparse_moe.experts.1.w2.weight": "model-00080-of-00108.safetensors", + "model.layers.59.block_sparse_moe.experts.1.w3.weight": "model-00081-of-00108.safetensors", + "model.layers.59.block_sparse_moe.gate.weight": "model-00080-of-00108.safetensors", + "model.layers.59.input_layernorm.weight": "model-00081-of-00108.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00081-of-00108.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00079-of-00108.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00080-of-00108.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00079-of-00108.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00080-of-00108.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00108.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00009-of-00108.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00009-of-00108.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00010-of-00108.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00010-of-00108.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00108.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00009-of-00108.safetensors", + "model.layers.6.input_layernorm.weight": "model-00010-of-00108.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00010-of-00108.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00009-of-00108.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00009-of-00108.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00009-of-00108.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00009-of-00108.safetensors", + "model.layers.60.block_sparse_moe.experts.0.w1.weight": "model-00081-of-00108.safetensors", + "model.layers.60.block_sparse_moe.experts.0.w2.weight": "model-00081-of-00108.safetensors", + "model.layers.60.block_sparse_moe.experts.0.w3.weight": "model-00081-of-00108.safetensors", + "model.layers.60.block_sparse_moe.experts.1.w1.weight": "model-00082-of-00108.safetensors", + "model.layers.60.block_sparse_moe.experts.1.w2.weight": "model-00082-of-00108.safetensors", + "model.layers.60.block_sparse_moe.experts.1.w3.weight": "model-00082-of-00108.safetensors", + "model.layers.60.block_sparse_moe.gate.weight": "model-00081-of-00108.safetensors", + "model.layers.60.input_layernorm.weight": "model-00082-of-00108.safetensors", + "model.layers.60.post_attention_layernorm.weight": "model-00082-of-00108.safetensors", + "model.layers.60.self_attn.k_proj.weight": "model-00081-of-00108.safetensors", + "model.layers.60.self_attn.o_proj.weight": "model-00081-of-00108.safetensors", + "model.layers.60.self_attn.q_proj.weight": "model-00081-of-00108.safetensors", + "model.layers.60.self_attn.v_proj.weight": "model-00081-of-00108.safetensors", + "model.layers.61.block_sparse_moe.experts.0.w1.weight": "model-00082-of-00108.safetensors", + "model.layers.61.block_sparse_moe.experts.0.w2.weight": "model-00083-of-00108.safetensors", + "model.layers.61.block_sparse_moe.experts.0.w3.weight": "model-00083-of-00108.safetensors", + "model.layers.61.block_sparse_moe.experts.1.w1.weight": "model-00083-of-00108.safetensors", + "model.layers.61.block_sparse_moe.experts.1.w2.weight": "model-00083-of-00108.safetensors", + "model.layers.61.block_sparse_moe.experts.1.w3.weight": "model-00083-of-00108.safetensors", + "model.layers.61.block_sparse_moe.gate.weight": "model-00082-of-00108.safetensors", + "model.layers.61.input_layernorm.weight": "model-00083-of-00108.safetensors", + "model.layers.61.post_attention_layernorm.weight": "model-00083-of-00108.safetensors", + "model.layers.61.self_attn.k_proj.weight": "model-00082-of-00108.safetensors", + "model.layers.61.self_attn.o_proj.weight": "model-00082-of-00108.safetensors", + "model.layers.61.self_attn.q_proj.weight": "model-00082-of-00108.safetensors", + "model.layers.61.self_attn.v_proj.weight": "model-00082-of-00108.safetensors", + "model.layers.62.block_sparse_moe.experts.0.w1.weight": "model-00084-of-00108.safetensors", + "model.layers.62.block_sparse_moe.experts.0.w2.weight": "model-00084-of-00108.safetensors", + "model.layers.62.block_sparse_moe.experts.0.w3.weight": "model-00084-of-00108.safetensors", + "model.layers.62.block_sparse_moe.experts.1.w1.weight": "model-00084-of-00108.safetensors", + "model.layers.62.block_sparse_moe.experts.1.w2.weight": "model-00084-of-00108.safetensors", + "model.layers.62.block_sparse_moe.experts.1.w3.weight": "model-00085-of-00108.safetensors", + "model.layers.62.block_sparse_moe.gate.weight": "model-00084-of-00108.safetensors", + "model.layers.62.input_layernorm.weight": "model-00085-of-00108.safetensors", + "model.layers.62.post_attention_layernorm.weight": "model-00085-of-00108.safetensors", + "model.layers.62.self_attn.k_proj.weight": "model-00083-of-00108.safetensors", + "model.layers.62.self_attn.o_proj.weight": "model-00084-of-00108.safetensors", + "model.layers.62.self_attn.q_proj.weight": "model-00083-of-00108.safetensors", + "model.layers.62.self_attn.v_proj.weight": "model-00084-of-00108.safetensors", + "model.layers.63.block_sparse_moe.experts.0.w1.weight": "model-00085-of-00108.safetensors", + "model.layers.63.block_sparse_moe.experts.0.w2.weight": "model-00085-of-00108.safetensors", + "model.layers.63.block_sparse_moe.experts.0.w3.weight": "model-00085-of-00108.safetensors", + "model.layers.63.block_sparse_moe.experts.1.w1.weight": "model-00086-of-00108.safetensors", + "model.layers.63.block_sparse_moe.experts.1.w2.weight": "model-00086-of-00108.safetensors", + "model.layers.63.block_sparse_moe.experts.1.w3.weight": "model-00086-of-00108.safetensors", + "model.layers.63.block_sparse_moe.gate.weight": "model-00085-of-00108.safetensors", + "model.layers.63.input_layernorm.weight": "model-00086-of-00108.safetensors", + "model.layers.63.post_attention_layernorm.weight": "model-00086-of-00108.safetensors", + "model.layers.63.self_attn.k_proj.weight": "model-00085-of-00108.safetensors", + "model.layers.63.self_attn.o_proj.weight": "model-00085-of-00108.safetensors", + "model.layers.63.self_attn.q_proj.weight": "model-00085-of-00108.safetensors", + "model.layers.63.self_attn.v_proj.weight": "model-00085-of-00108.safetensors", + "model.layers.64.block_sparse_moe.experts.0.w1.weight": "model-00086-of-00108.safetensors", + "model.layers.64.block_sparse_moe.experts.0.w2.weight": "model-00087-of-00108.safetensors", + "model.layers.64.block_sparse_moe.experts.0.w3.weight": "model-00087-of-00108.safetensors", + "model.layers.64.block_sparse_moe.experts.1.w1.weight": "model-00087-of-00108.safetensors", + "model.layers.64.block_sparse_moe.experts.1.w2.weight": "model-00087-of-00108.safetensors", + "model.layers.64.block_sparse_moe.experts.1.w3.weight": "model-00087-of-00108.safetensors", + "model.layers.64.block_sparse_moe.gate.weight": "model-00086-of-00108.safetensors", + "model.layers.64.input_layernorm.weight": "model-00087-of-00108.safetensors", + "model.layers.64.post_attention_layernorm.weight": "model-00087-of-00108.safetensors", + "model.layers.64.self_attn.k_proj.weight": "model-00086-of-00108.safetensors", + "model.layers.64.self_attn.o_proj.weight": "model-00086-of-00108.safetensors", + "model.layers.64.self_attn.q_proj.weight": "model-00086-of-00108.safetensors", + "model.layers.64.self_attn.v_proj.weight": "model-00086-of-00108.safetensors", + "model.layers.65.block_sparse_moe.experts.0.w1.weight": "model-00088-of-00108.safetensors", + "model.layers.65.block_sparse_moe.experts.0.w2.weight": "model-00088-of-00108.safetensors", + "model.layers.65.block_sparse_moe.experts.0.w3.weight": "model-00088-of-00108.safetensors", + "model.layers.65.block_sparse_moe.experts.1.w1.weight": "model-00088-of-00108.safetensors", + "model.layers.65.block_sparse_moe.experts.1.w2.weight": "model-00088-of-00108.safetensors", + "model.layers.65.block_sparse_moe.experts.1.w3.weight": "model-00089-of-00108.safetensors", + "model.layers.65.block_sparse_moe.gate.weight": "model-00088-of-00108.safetensors", + "model.layers.65.input_layernorm.weight": "model-00089-of-00108.safetensors", + "model.layers.65.post_attention_layernorm.weight": "model-00089-of-00108.safetensors", + "model.layers.65.self_attn.k_proj.weight": "model-00087-of-00108.safetensors", + "model.layers.65.self_attn.o_proj.weight": "model-00088-of-00108.safetensors", + "model.layers.65.self_attn.q_proj.weight": "model-00087-of-00108.safetensors", + "model.layers.65.self_attn.v_proj.weight": "model-00088-of-00108.safetensors", + "model.layers.66.block_sparse_moe.experts.0.w1.weight": "model-00089-of-00108.safetensors", + "model.layers.66.block_sparse_moe.experts.0.w2.weight": "model-00089-of-00108.safetensors", + "model.layers.66.block_sparse_moe.experts.0.w3.weight": "model-00089-of-00108.safetensors", + "model.layers.66.block_sparse_moe.experts.1.w1.weight": "model-00090-of-00108.safetensors", + "model.layers.66.block_sparse_moe.experts.1.w2.weight": "model-00090-of-00108.safetensors", + "model.layers.66.block_sparse_moe.experts.1.w3.weight": "model-00090-of-00108.safetensors", + "model.layers.66.block_sparse_moe.gate.weight": "model-00089-of-00108.safetensors", + "model.layers.66.input_layernorm.weight": "model-00090-of-00108.safetensors", + "model.layers.66.post_attention_layernorm.weight": "model-00090-of-00108.safetensors", + "model.layers.66.self_attn.k_proj.weight": "model-00089-of-00108.safetensors", + "model.layers.66.self_attn.o_proj.weight": "model-00089-of-00108.safetensors", + "model.layers.66.self_attn.q_proj.weight": "model-00089-of-00108.safetensors", + "model.layers.66.self_attn.v_proj.weight": "model-00089-of-00108.safetensors", + "model.layers.67.block_sparse_moe.experts.0.w1.weight": "model-00090-of-00108.safetensors", + "model.layers.67.block_sparse_moe.experts.0.w2.weight": "model-00091-of-00108.safetensors", + "model.layers.67.block_sparse_moe.experts.0.w3.weight": "model-00091-of-00108.safetensors", + "model.layers.67.block_sparse_moe.experts.1.w1.weight": "model-00091-of-00108.safetensors", + "model.layers.67.block_sparse_moe.experts.1.w2.weight": "model-00091-of-00108.safetensors", + "model.layers.67.block_sparse_moe.experts.1.w3.weight": "model-00091-of-00108.safetensors", + "model.layers.67.block_sparse_moe.gate.weight": "model-00090-of-00108.safetensors", + "model.layers.67.input_layernorm.weight": "model-00091-of-00108.safetensors", + "model.layers.67.post_attention_layernorm.weight": "model-00091-of-00108.safetensors", + "model.layers.67.self_attn.k_proj.weight": "model-00090-of-00108.safetensors", + "model.layers.67.self_attn.o_proj.weight": "model-00090-of-00108.safetensors", + "model.layers.67.self_attn.q_proj.weight": "model-00090-of-00108.safetensors", + "model.layers.67.self_attn.v_proj.weight": "model-00090-of-00108.safetensors", + "model.layers.68.block_sparse_moe.experts.0.w1.weight": "model-00092-of-00108.safetensors", + "model.layers.68.block_sparse_moe.experts.0.w2.weight": "model-00092-of-00108.safetensors", + "model.layers.68.block_sparse_moe.experts.0.w3.weight": "model-00092-of-00108.safetensors", + "model.layers.68.block_sparse_moe.experts.1.w1.weight": "model-00092-of-00108.safetensors", + "model.layers.68.block_sparse_moe.experts.1.w2.weight": "model-00092-of-00108.safetensors", + "model.layers.68.block_sparse_moe.experts.1.w3.weight": "model-00093-of-00108.safetensors", + "model.layers.68.block_sparse_moe.gate.weight": "model-00092-of-00108.safetensors", + "model.layers.68.input_layernorm.weight": "model-00093-of-00108.safetensors", + "model.layers.68.post_attention_layernorm.weight": "model-00093-of-00108.safetensors", + "model.layers.68.self_attn.k_proj.weight": "model-00091-of-00108.safetensors", + "model.layers.68.self_attn.o_proj.weight": "model-00092-of-00108.safetensors", + "model.layers.68.self_attn.q_proj.weight": "model-00091-of-00108.safetensors", + "model.layers.68.self_attn.v_proj.weight": "model-00092-of-00108.safetensors", + "model.layers.69.block_sparse_moe.experts.0.w1.weight": "model-00093-of-00108.safetensors", + "model.layers.69.block_sparse_moe.experts.0.w2.weight": "model-00093-of-00108.safetensors", + "model.layers.69.block_sparse_moe.experts.0.w3.weight": "model-00093-of-00108.safetensors", + "model.layers.69.block_sparse_moe.experts.1.w1.weight": "model-00094-of-00108.safetensors", + "model.layers.69.block_sparse_moe.experts.1.w2.weight": "model-00094-of-00108.safetensors", + "model.layers.69.block_sparse_moe.experts.1.w3.weight": "model-00094-of-00108.safetensors", + "model.layers.69.block_sparse_moe.gate.weight": "model-00093-of-00108.safetensors", + "model.layers.69.input_layernorm.weight": "model-00094-of-00108.safetensors", + "model.layers.69.post_attention_layernorm.weight": "model-00094-of-00108.safetensors", + "model.layers.69.self_attn.k_proj.weight": "model-00093-of-00108.safetensors", + "model.layers.69.self_attn.o_proj.weight": "model-00093-of-00108.safetensors", + "model.layers.69.self_attn.q_proj.weight": "model-00093-of-00108.safetensors", + "model.layers.69.self_attn.v_proj.weight": "model-00093-of-00108.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00108.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00108.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00108.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00108.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00011-of-00108.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00108.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00010-of-00108.safetensors", + "model.layers.7.input_layernorm.weight": "model-00011-of-00108.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00011-of-00108.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00010-of-00108.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00010-of-00108.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00010-of-00108.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00010-of-00108.safetensors", + "model.layers.70.block_sparse_moe.experts.0.w1.weight": "model-00094-of-00108.safetensors", + "model.layers.70.block_sparse_moe.experts.0.w2.weight": "model-00095-of-00108.safetensors", + "model.layers.70.block_sparse_moe.experts.0.w3.weight": "model-00095-of-00108.safetensors", + "model.layers.70.block_sparse_moe.experts.1.w1.weight": "model-00095-of-00108.safetensors", + "model.layers.70.block_sparse_moe.experts.1.w2.weight": "model-00095-of-00108.safetensors", + "model.layers.70.block_sparse_moe.experts.1.w3.weight": "model-00095-of-00108.safetensors", + "model.layers.70.block_sparse_moe.gate.weight": "model-00094-of-00108.safetensors", + "model.layers.70.input_layernorm.weight": "model-00095-of-00108.safetensors", + "model.layers.70.post_attention_layernorm.weight": "model-00095-of-00108.safetensors", + "model.layers.70.self_attn.k_proj.weight": "model-00094-of-00108.safetensors", + "model.layers.70.self_attn.o_proj.weight": "model-00094-of-00108.safetensors", + "model.layers.70.self_attn.q_proj.weight": "model-00094-of-00108.safetensors", + "model.layers.70.self_attn.v_proj.weight": "model-00094-of-00108.safetensors", + "model.layers.71.block_sparse_moe.experts.0.w1.weight": "model-00096-of-00108.safetensors", + "model.layers.71.block_sparse_moe.experts.0.w2.weight": "model-00096-of-00108.safetensors", + "model.layers.71.block_sparse_moe.experts.0.w3.weight": "model-00096-of-00108.safetensors", + "model.layers.71.block_sparse_moe.experts.1.w1.weight": "model-00096-of-00108.safetensors", + "model.layers.71.block_sparse_moe.experts.1.w2.weight": "model-00096-of-00108.safetensors", + "model.layers.71.block_sparse_moe.experts.1.w3.weight": "model-00097-of-00108.safetensors", + "model.layers.71.block_sparse_moe.gate.weight": "model-00096-of-00108.safetensors", + "model.layers.71.input_layernorm.weight": "model-00097-of-00108.safetensors", + "model.layers.71.post_attention_layernorm.weight": "model-00097-of-00108.safetensors", + "model.layers.71.self_attn.k_proj.weight": "model-00095-of-00108.safetensors", + "model.layers.71.self_attn.o_proj.weight": "model-00096-of-00108.safetensors", + "model.layers.71.self_attn.q_proj.weight": "model-00095-of-00108.safetensors", + "model.layers.71.self_attn.v_proj.weight": "model-00096-of-00108.safetensors", + "model.layers.72.block_sparse_moe.experts.0.w1.weight": "model-00097-of-00108.safetensors", + "model.layers.72.block_sparse_moe.experts.0.w2.weight": "model-00097-of-00108.safetensors", + "model.layers.72.block_sparse_moe.experts.0.w3.weight": "model-00097-of-00108.safetensors", + "model.layers.72.block_sparse_moe.experts.1.w1.weight": "model-00098-of-00108.safetensors", + "model.layers.72.block_sparse_moe.experts.1.w2.weight": "model-00098-of-00108.safetensors", + "model.layers.72.block_sparse_moe.experts.1.w3.weight": "model-00098-of-00108.safetensors", + "model.layers.72.block_sparse_moe.gate.weight": "model-00097-of-00108.safetensors", + "model.layers.72.input_layernorm.weight": "model-00098-of-00108.safetensors", + "model.layers.72.post_attention_layernorm.weight": "model-00098-of-00108.safetensors", + "model.layers.72.self_attn.k_proj.weight": "model-00097-of-00108.safetensors", + "model.layers.72.self_attn.o_proj.weight": "model-00097-of-00108.safetensors", + "model.layers.72.self_attn.q_proj.weight": "model-00097-of-00108.safetensors", + "model.layers.72.self_attn.v_proj.weight": "model-00097-of-00108.safetensors", + "model.layers.73.block_sparse_moe.experts.0.w1.weight": "model-00098-of-00108.safetensors", + "model.layers.73.block_sparse_moe.experts.0.w2.weight": "model-00099-of-00108.safetensors", + "model.layers.73.block_sparse_moe.experts.0.w3.weight": "model-00099-of-00108.safetensors", + "model.layers.73.block_sparse_moe.experts.1.w1.weight": "model-00099-of-00108.safetensors", + "model.layers.73.block_sparse_moe.experts.1.w2.weight": "model-00099-of-00108.safetensors", + "model.layers.73.block_sparse_moe.experts.1.w3.weight": "model-00099-of-00108.safetensors", + "model.layers.73.block_sparse_moe.gate.weight": "model-00098-of-00108.safetensors", + "model.layers.73.input_layernorm.weight": "model-00099-of-00108.safetensors", + "model.layers.73.post_attention_layernorm.weight": "model-00099-of-00108.safetensors", + "model.layers.73.self_attn.k_proj.weight": "model-00098-of-00108.safetensors", + "model.layers.73.self_attn.o_proj.weight": "model-00098-of-00108.safetensors", + "model.layers.73.self_attn.q_proj.weight": "model-00098-of-00108.safetensors", + "model.layers.73.self_attn.v_proj.weight": "model-00098-of-00108.safetensors", + "model.layers.74.block_sparse_moe.experts.0.w1.weight": "model-00100-of-00108.safetensors", + "model.layers.74.block_sparse_moe.experts.0.w2.weight": "model-00100-of-00108.safetensors", + "model.layers.74.block_sparse_moe.experts.0.w3.weight": "model-00100-of-00108.safetensors", + "model.layers.74.block_sparse_moe.experts.1.w1.weight": "model-00100-of-00108.safetensors", + "model.layers.74.block_sparse_moe.experts.1.w2.weight": "model-00100-of-00108.safetensors", + "model.layers.74.block_sparse_moe.experts.1.w3.weight": "model-00101-of-00108.safetensors", + "model.layers.74.block_sparse_moe.gate.weight": "model-00100-of-00108.safetensors", + "model.layers.74.input_layernorm.weight": "model-00101-of-00108.safetensors", + "model.layers.74.post_attention_layernorm.weight": "model-00101-of-00108.safetensors", + "model.layers.74.self_attn.k_proj.weight": "model-00099-of-00108.safetensors", + "model.layers.74.self_attn.o_proj.weight": "model-00100-of-00108.safetensors", + "model.layers.74.self_attn.q_proj.weight": "model-00099-of-00108.safetensors", + "model.layers.74.self_attn.v_proj.weight": "model-00100-of-00108.safetensors", + "model.layers.75.block_sparse_moe.experts.0.w1.weight": "model-00101-of-00108.safetensors", + "model.layers.75.block_sparse_moe.experts.0.w2.weight": "model-00101-of-00108.safetensors", + "model.layers.75.block_sparse_moe.experts.0.w3.weight": "model-00101-of-00108.safetensors", + "model.layers.75.block_sparse_moe.experts.1.w1.weight": "model-00102-of-00108.safetensors", + "model.layers.75.block_sparse_moe.experts.1.w2.weight": "model-00102-of-00108.safetensors", + "model.layers.75.block_sparse_moe.experts.1.w3.weight": "model-00102-of-00108.safetensors", + "model.layers.75.block_sparse_moe.gate.weight": "model-00101-of-00108.safetensors", + "model.layers.75.input_layernorm.weight": "model-00102-of-00108.safetensors", + "model.layers.75.post_attention_layernorm.weight": "model-00102-of-00108.safetensors", + "model.layers.75.self_attn.k_proj.weight": "model-00101-of-00108.safetensors", + "model.layers.75.self_attn.o_proj.weight": "model-00101-of-00108.safetensors", + "model.layers.75.self_attn.q_proj.weight": "model-00101-of-00108.safetensors", + "model.layers.75.self_attn.v_proj.weight": "model-00101-of-00108.safetensors", + "model.layers.76.block_sparse_moe.experts.0.w1.weight": "model-00102-of-00108.safetensors", + "model.layers.76.block_sparse_moe.experts.0.w2.weight": "model-00103-of-00108.safetensors", + "model.layers.76.block_sparse_moe.experts.0.w3.weight": "model-00103-of-00108.safetensors", + "model.layers.76.block_sparse_moe.experts.1.w1.weight": "model-00103-of-00108.safetensors", + "model.layers.76.block_sparse_moe.experts.1.w2.weight": "model-00103-of-00108.safetensors", + "model.layers.76.block_sparse_moe.experts.1.w3.weight": "model-00103-of-00108.safetensors", + "model.layers.76.block_sparse_moe.gate.weight": "model-00102-of-00108.safetensors", + "model.layers.76.input_layernorm.weight": "model-00103-of-00108.safetensors", + "model.layers.76.post_attention_layernorm.weight": "model-00103-of-00108.safetensors", + "model.layers.76.self_attn.k_proj.weight": "model-00102-of-00108.safetensors", + "model.layers.76.self_attn.o_proj.weight": "model-00102-of-00108.safetensors", + "model.layers.76.self_attn.q_proj.weight": "model-00102-of-00108.safetensors", + "model.layers.76.self_attn.v_proj.weight": "model-00102-of-00108.safetensors", + "model.layers.77.block_sparse_moe.experts.0.w1.weight": "model-00104-of-00108.safetensors", + "model.layers.77.block_sparse_moe.experts.0.w2.weight": "model-00104-of-00108.safetensors", + "model.layers.77.block_sparse_moe.experts.0.w3.weight": "model-00104-of-00108.safetensors", + "model.layers.77.block_sparse_moe.experts.1.w1.weight": "model-00104-of-00108.safetensors", + "model.layers.77.block_sparse_moe.experts.1.w2.weight": "model-00104-of-00108.safetensors", + "model.layers.77.block_sparse_moe.experts.1.w3.weight": "model-00105-of-00108.safetensors", + "model.layers.77.block_sparse_moe.gate.weight": "model-00104-of-00108.safetensors", + "model.layers.77.input_layernorm.weight": "model-00105-of-00108.safetensors", + "model.layers.77.post_attention_layernorm.weight": "model-00105-of-00108.safetensors", + "model.layers.77.self_attn.k_proj.weight": "model-00103-of-00108.safetensors", + "model.layers.77.self_attn.o_proj.weight": "model-00104-of-00108.safetensors", + "model.layers.77.self_attn.q_proj.weight": "model-00103-of-00108.safetensors", + "model.layers.77.self_attn.v_proj.weight": "model-00104-of-00108.safetensors", + "model.layers.78.block_sparse_moe.experts.0.w1.weight": "model-00105-of-00108.safetensors", + "model.layers.78.block_sparse_moe.experts.0.w2.weight": "model-00105-of-00108.safetensors", + "model.layers.78.block_sparse_moe.experts.0.w3.weight": "model-00105-of-00108.safetensors", + "model.layers.78.block_sparse_moe.experts.1.w1.weight": "model-00106-of-00108.safetensors", + "model.layers.78.block_sparse_moe.experts.1.w2.weight": "model-00106-of-00108.safetensors", + "model.layers.78.block_sparse_moe.experts.1.w3.weight": "model-00106-of-00108.safetensors", + "model.layers.78.block_sparse_moe.gate.weight": "model-00105-of-00108.safetensors", + "model.layers.78.input_layernorm.weight": "model-00106-of-00108.safetensors", + "model.layers.78.post_attention_layernorm.weight": "model-00106-of-00108.safetensors", + "model.layers.78.self_attn.k_proj.weight": "model-00105-of-00108.safetensors", + "model.layers.78.self_attn.o_proj.weight": "model-00105-of-00108.safetensors", + "model.layers.78.self_attn.q_proj.weight": "model-00105-of-00108.safetensors", + "model.layers.78.self_attn.v_proj.weight": "model-00105-of-00108.safetensors", + "model.layers.79.block_sparse_moe.experts.0.w1.weight": "model-00106-of-00108.safetensors", + "model.layers.79.block_sparse_moe.experts.0.w2.weight": "model-00107-of-00108.safetensors", + "model.layers.79.block_sparse_moe.experts.0.w3.weight": "model-00107-of-00108.safetensors", + "model.layers.79.block_sparse_moe.experts.1.w1.weight": "model-00107-of-00108.safetensors", + "model.layers.79.block_sparse_moe.experts.1.w2.weight": "model-00107-of-00108.safetensors", + "model.layers.79.block_sparse_moe.experts.1.w3.weight": "model-00107-of-00108.safetensors", + "model.layers.79.block_sparse_moe.gate.weight": "model-00106-of-00108.safetensors", + "model.layers.79.input_layernorm.weight": "model-00107-of-00108.safetensors", + "model.layers.79.post_attention_layernorm.weight": "model-00107-of-00108.safetensors", + "model.layers.79.self_attn.k_proj.weight": "model-00106-of-00108.safetensors", + "model.layers.79.self_attn.o_proj.weight": "model-00106-of-00108.safetensors", + "model.layers.79.self_attn.q_proj.weight": "model-00106-of-00108.safetensors", + "model.layers.79.self_attn.v_proj.weight": "model-00106-of-00108.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00012-of-00108.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00012-of-00108.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00012-of-00108.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00012-of-00108.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00012-of-00108.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00108.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00012-of-00108.safetensors", + "model.layers.8.input_layernorm.weight": "model-00013-of-00108.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00013-of-00108.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00011-of-00108.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00012-of-00108.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00011-of-00108.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00012-of-00108.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00108.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00108.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00108.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00108.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00108.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00014-of-00108.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00013-of-00108.safetensors", + "model.layers.9.input_layernorm.weight": "model-00014-of-00108.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00014-of-00108.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00013-of-00108.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00013-of-00108.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00013-of-00108.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00013-of-00108.safetensors", + "model.norm.weight": "model-00107-of-00108.safetensors" + } +}