diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,14183 @@ +{ + "metadata": { + "ParamSize": 885, + "ParamBytes": 68971290624.0, + "BitsPerParam": 4.500203998122676 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32768, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "acde86e3649685897159d6c9544a26b5" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32768, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "2b862279759a17a046c98b07fbc33086" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32768, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "25e841fc0c072186cdd76e253cbcc878" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "d9657099df116e259b7cf36621b5cac9" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "3ddf95f392effc5d0fedd6b236faaf43" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "07b43c716d86780c6b1d20b05cdef5ed" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "67b7d3a58a8f97e53f5c6315b4072af3" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "7ca3ad42f7b718b80f2fccd70a6cde05" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 25214976, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32768, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 25165824 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 25190400 + } + ], + "md5sum": "6e860c9309483645cba9d0730312da9a" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "5b189dfa45f32a231acc4b4b00009bae" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "3a5384e6b463058a027dae814a577923" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "880a7ee27e6c41c35292a8c990641659" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "b8314a848e6d1c137bc2bdc94337c4ef" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "2badbbd92340f3514e91d49750b80b8c" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "a823f4eaf64a38992dd98dfdcee82afb" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "780bc69a90dbf7dd9b78bc0e21268fc9" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "db2a223265358c44956811e5a0849238" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "c870e202fb63547686551c108a0afa67" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "9e4139f7583993be9a6b61d4c326fa96" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "f7d8dd7cb5a46e5d02e3f2eff8a0058b" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "fb050e03b42bb38a589289622a67c5ad" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "ef14b5e844c72225258ec63c81b6282c" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "4593dc84cc03455aeb2c834abc0f7269" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "85a4c26f2500e75b601937b038156279" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "cfeb9fdbca105815cc54c4bfe6a03dce" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "9e38b16a480496b81c439f201000516f" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "9f3302797b9270ef713a81b5cf14b2ba" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "eecc764e67816f629672ee8deec1665e" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "0e55042b7a55ad59401ddf03da8b4dc6" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "11131dd1a4724e273ff0737132f4dd43" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "2934b1cca2c14a8d27c2995a7243796b" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "ff073cb193051ba2ad7b3748e081c880" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "cea7e358b7927d86b22bbf8cfbe11878" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "442eea7fc4b1672a49893668b143010d" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "f0b403986ce969224679718b064eb58a" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "3a6b087fe1d4986ef902bd8757b53784" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "c4f1fb18bcde31f1a4aada95fa95ddb7" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "69fe0ba49a0c8bf37d07c4c0b2fb6f31" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "083daedd5d1aa067b4c34d2209b5b799" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "21e902babac37bf11da65da812b24115" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "7a07d2dde8cdb42c8ec00ac333421535" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "9545be50985f91ee4c130dda03227c93" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "0949a8cdbee6d0b932b5a5abeb3e3d2f" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "6c1ff20a82199aec95da59fc287fe7f0" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "c68ac0ca356c95b23753390a795ed67f" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "3cf001bfd738669e6da3419b86ac979f" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "7f678cf2cb326fbdcfda74b6c703aeb8" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "f39a5c5e579ccb93b5592d4554b21d00" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "8f979dd452795afea6c11947a7a19991" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "f0604afc618baa4a4339b38baee676fb" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "52f013189749098e6280d619cca02405" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "09ca4d218f81f0a4640c60c6dd779e27" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "336bb803ce9e3add71630e2745b1b494" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "5653cb072011aac07321d67c7390c161" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d03c907cb02bd40d57a55a4d3b8b3924" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "f00f1d0486759f931b8fc6ea5d250be6" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "20e6357c3a3e627aab72d76b7b5e100f" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "ef033635941918bf50b1cf1d584506ce" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "32c52880c403ebd14d2ff436c08003a1" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "b70dc0abbb1dd6ff0566b4c2a932dba7" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "73f319daf8a044fc6fcf6b0b413da613" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "494fe34aeccc00bf4bef514a14c57717" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "74cf84a879bf6a72c57043d85ba569ff" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "3d3eb40b23b2263b6468eb0ea33b8b78" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "b0cfa36aceacc6bbe4b138436709631d" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "797261ca0902a5959fc5dfc87a948154" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "9463f1d8448d25ed34c453443b03786f" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "cdea5dceec692e0ca2518f2f2a6522a4" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "343cd7557f18e368a7cdabc06b062812" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "f32a3a4a56bbd3e3a57010ce1e2f03a3" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "6b3e94ece6ede1d918ee312220837343" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "47bd7d201b231b26d28201cebc539b07" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "cf15441da3e8478f3464ebbabb592ce7" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "8b8b6ead973484cbe5dc823dc9101fac" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "b849dc028134ac60bb67dec42c27ae0a" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "44b427c522a7128c6269f1a26997902e" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "e4c62de42f5184d4d5decec3d6f72fcf" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "2b0655ab901ae68e12651823616e22d3" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "085619e639cc4f207702c0fa74441c92" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "3e3e83a56541e316262c52e0e2e12fb6" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "696654dbc02125607dfa639a5337f2ee" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "6fe260f1d796e1a9f78d44cac4d41deb" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "6dffcaadeba185648a6a30959e044c5d" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "2971ab416afd24ba2ba5dcb8286a3010" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "4d115db8b50a01bbb186a9f4b1bbd227" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "545c728c9ea705a722e8162bd5209ae1" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "5fe77c7059dc883a752df4a83d021274" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "b60fe75ce4b1ebc2b18746a423a2f827" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "318c77b19ff3d53339f68f46adec01e4" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "d7a42adf3525115b32698de4a43bf8e0" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "ac169d48ca162867af24b543c6dfa0c8" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "b4fbf09533ba8d122107357e71ddafc8" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "5cefe580bdc1d880cb0aa87eb61efa4b" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "3caa97384aa49ba348e711d4381f0329" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "6ffa53cee7bb0fb91f667583704bd948" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "2d876fec856a9a254f9912b96cf38432" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "76c7e1191bf793b09b58d98e65fe4f74" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "29c86841f6385c8e380c212d027d501f" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "f15a362d3ff2536c0eb3bb3a7518e69c" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "437f461e6fa40ffec14a1f39d5ee1a73" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "6d9c8bbd19ba4babc447f0bde3c125f6" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "9abc11e064bec28e6b2a85944771c886" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "12c97ec4901a8c11242a40c3907a84fb" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "a453ba3ee3334b7641acdbb31c2929ed" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "47107c0d62e8fffde83f05374a8110c4" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "8bc0de504fe5495d7947a85331978a2f" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "fe99a46e0f81d46ef2fb8e2621379ac6" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "3169cc638fac88415960ec0aded72422" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "4b10b408c6e3b82dfa5ca26ee17941a0" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "a48c234560eeff99727f8d0d74f384d0" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "ff08c973f8e3bd2d42b699c185dd9c81" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "c785f19b2e6484f8b8d43b89c06b06f9" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "c5ad72748ae02cc237005d5429777fc0" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "8649cd28752ac233f9f8f2573232817c" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "9537ead4c2bb89e598da4e070f43c89f" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "7d746e5fa780eea19b937931229dd987" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "11ccb7d9e095e645fb402119bfb26a20" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "3bcc30e91f533e015a48f277a9bd6ff0" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "aece2fe42034d82c713242f9494d5457" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "533f2c8883494767f9454299ec460e35" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "8fb60c791a20370a4af3dc8d23db000b" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "9b8bea9971da17cf96f616fc8c7da38f" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "2037c19179f6c404b0589ef46a46b423" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "a1aba9d22823e25f96bb739bfa36d4b3" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "a6259b196c8e1b2d1ffe8344cc7826b8" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "7137759ac118e7399717cc85e3cf36d9" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "674022ff3eae36dac40e9cc1d8eb8d91" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "23c7c089e2d2289ea90084964ddc6a05" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "bbda8e3c46f392a664647be84e40a2d0" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "97ac29a2631ae773b271b1f683e8a9f8" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "bd297e8c01daeed2f9f9fdc277288ff4" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "45f2574ac0bb2256e06274f327751f4c" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "fd9c8da0ff3a8356f1db23819775af33" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "3ec71e39cdb65a74c1050a4499b16d0d" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "399c4691e921ca99a4d6db6ef2dda3f6" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "97df8419d44406f936ebde234deb6e6a" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "d47fcda971dd30af3f43cffcaaf9a59e" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "a8fd092687572bf2d589bb7f10fa7984" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "116182e8e2b5fffcb9b9438c68918435" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "557f5da078afeec50a3c233023ec561e" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "d33dacd0bcff279779e76c3d89155374" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "f90d72184037d5e8abebf456392489da" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "6e993f34aedb1c65af71a97160a8a076" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "15828b4da31f70213ecf8e6566a0b682" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "187154f52b57d0c3e7bbefc60d3aa4d9" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "8a981d3bf97da8571f148d21abefe1e9" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "7a172c2c7772b2fb336711311f13202b" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "a12f723709fae754bb7d3d8e6d5980cd" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "e6c298f78ce5326a5a81c74903f21db4" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "d74febb7fa2a7b4a614fbdc7522accac" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "05ec41d4423a9f913a32ea816984942e" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "cc65625cbcbdf5d7b5b49b103faf2c91" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "82fbdca08f39c83edcd9847207c1b107" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "5a72944315fa80dfec8e3a45e8c03102" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "202b588ec947e7f6bb4077f6cbc01919" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "2621200f8186e56439cbc1fddd6829d2" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "591cd993bbe04161182cf9a715679052" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "88c33e32de40433a99380cc909f5c04a" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "4a3fa9ce7cf1c704eea04a4b6679d516" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "a6534dd6e3331bcc2ea45087f9792aae" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "62f16b25e51a8fa73e248e83706e229d" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "bc33f5953bd871e7d4a533a742a011d5" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "0aea5bdf0ce67a257255ccb200db30e2" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "1dcac878e62d9bb80a2d810160de3438" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "5dcf63f5ba4849a70bd969dd41b1a7a1" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "0aa5bce721306097e1d884b988d50198" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "4e1d0d23320a413db320a4b88e1340be" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "8d23669275554d80f6b547a2fd780d19" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "4cc7bef0913efd7f23e9f512f400f1f9" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "32ba88d0992d3de601e27fcc48456272" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "c71a13ed41cf2e37c6b038eca30f5a35" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "0cc39edc42515aa6c6f050bf5e3548b2" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "75d7cfa16717b4db3ff6784b991ad0f2" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "f1168746b0c2c8bf5b753513bb5ddfc2" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "90055ef5a09aa6cfdceaa266f1db0caf" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "909522b32eeba47569685f328c4dcfad" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "98a301eba03deb67eb6d98837a84b5af" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "b96528dcabaf26b689da379c5dda373e" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "75ca59ac7dacf7a3c3d9dfd2b445b64e" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "8f9bc4e0af7f3d92ce094e9ec6c08831" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "5e007f40f62389af7b99391db67d2301" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "78ac3007644020cb83d41ba47b1ebd61" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "27807ecdb82423aeb173ae9815a59e11" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "7da2c691f5d3847d2812231aa865fde6" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "6c0fcbecd8e2b5ae99137b62887e33f4" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.33.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "e7a7f0a53382f4a17b244e5ab444eb83" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "0b6023febac54060b68d9143ebf398e6" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "222f33614b1a9c2cb49b413dc94d3b70" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "c481b9c701054d0e23ce8d667739ad66" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "da4435228af521dd3d8a9f430e731380" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "26c4a7cd40aa4dc011a38a7feb56c464" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "c17036c02206a9692c611b563da8afda" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "3f557f57a36e661a21514049095ebb9c" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "ba83c2c141cb6a3a387a1b322185e1cb" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "cd64d1054c03e63e6ad9ffdce88a1e4c" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "5f6af26268f34309b3fc36a19cece877" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "511069ca50f8e2c5381a6e61a491be5c" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "cbce28dbff39925c99c47a55ca2c30af" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.35.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "2b2a377c58176702e3fea4a8755b1724" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "6fa06abfd9da33bdefd941ca6601da15" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "5420f5e5b7120494a297b10cfb52c12c" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "22812b6dfef2ddc4cd99d4424475353e" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "3e8d6cfca5f54fc02ce7fce769b26a5d" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "4a69daf1bfb494fb7f331b2924c5d0ac" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "46b16e1bf37785534af3f356bcdfc7b5" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "b5504a59da80eea47670e73cfe251f58" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "af14070f0e50fd4755717b7cd965a860" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "da785ae422f17bbca94ee29416b27140" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "3894681325ec014340e2ccb31aad8ae2" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "350eede4cf32278362c8633f78391f7f" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "1ef1b3368faf809974bc30df70cdd7bd" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.37.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "1e3a950f7546ecd4121afc183ede8ab3" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "7f762ada5b6681e982cbc4b5db2cd6ad" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "6816756cb47ae7f945c9d4d3e67cbb4b" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "3a7a66b9c1f35f8bb07c6cacea88f9ce" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "e7d9e155da5729c52ff1dc724005c359" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "4264671161778c5ca04be3fceaebeb8e" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "4dcaef8964f707d3d8cb9f6232ecea32" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "a359cc21822f21e01f40c62b7f44631d" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "6bddfddc6b563cfca18ef2734acf5d9d" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "4d524b4746483e94725ee0a10de1e43b" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "3ae0313557fd5b9fd2cd050ec2802358" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "b7f5821826589741bbc9c0d725c25bf8" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "59f52893f6a44df43bf36ac468bd9d9e" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.39.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "f3ee4fa565d4d696f81a69f9c5ccda49" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "d6a8e8eb33f09be7416b3ce4f2358076" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "ce50769f14c3a24a42b10a808f8fccf7" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "78847c0d1d93c60cbfa6d672a97f4432" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "08a7c3f77f1ac307e7cc62bebcbe4932" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "97b550321db3dc58897feb812ebde8bc" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "8075ec975d5c50b99f628e687de9f789" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "c148c7e1e3023f8898b61ec6ff0af8ff" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "d52299ab8ae24448f7b525b1ea73581e" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "27a91e004349b23f45cdf733a0609c7a" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "fda4448eddfc4cc912b20129d46f0a77" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.40.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "1e7268619f84baa9c6de5e915de3f4d6" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d04589de0e53c5b876daa48a5c915df2" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.40.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "87243ded349fd72e4ff482cb6efdb2b9" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "2aacadff3297d834b3d6df9d34dd29db" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "7b0ad8622ba8a3cc59f2154e1a27b489" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "aa668325b4538cc4e38a5e62e5efa47d" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.41.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "ef253198818cd11a5b4f5d1c90310845" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "e89590e9c694a4cf19d67441003312d9" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "49d1d25bfb70b15ffdda487206543fcd" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "7f14f79847204e5170e0ab6b971868bc" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "33aa709fee749ef06bd04d102b1f77a5" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "4800bff4b4e1c2de4f3b171a1322ff0e" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "cc0ed4bfc99769de666d3ce2e30340f7" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.42.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "d4fabd00794a435bd496c395c651bb72" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "5f569df863a2a3df365d983a6fe314e1" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.41.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.42.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "647542c3410a1b415cfcecded01ef3aa" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "9e684a7e88fbb6440f195832c5fee88b" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "037f651e8da7b4343c8875b7453897f6" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "392bb208f10f3a9d96aa5326c3647e66" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.43.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "23036a230e8eeb63d686251a94d58358" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "4388dbaf29fc9d7eedb483fac4be6b10" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "50569425956eca247756f986d89904ad" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "a21646d78e1ba032f3423cfcd2e4ef5a" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "e1a9ba98adc811d01f64ae544db4c726" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "8555da3d4b237bab51763ee27466a195" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "297cd01d8ea6e21bb1d0c93042cd8734" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.44.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "de9c3ecdba795a2c4ce44f4e628c3643" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "3d7a574acc0b420a6ad00d82b1c56647" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.43.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.44.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "b18b451304a051b7c38fefe8347d0415" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "abfc62ecee44bda7a5fbfff37d5ea3bc" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "c01a570be1a6b254eeaad3fd8047c8da" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "791aca8f9a0f792665e6d15ed062edb5" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.45.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "9b823d9c58f07d767d4a7fafc6e9a686" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "816acb579f1e09def81643a3bd74f432" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "b1a87ec096b479c793050e20d1afa0c3" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "484d295582005ef601b3c28f695423ea" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "03f0c66fbbe081dc43340af9dffadcb3" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "2feeddd97a3337a83347dc1313be22a2" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "484fd54f741dc23629f45246ed662668" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.46.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "cd069f84bb1cd765f9c1ae8e49d9b2a4" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "0160879134557695237729c5e0dcf115" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.45.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.46.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "1948c3cc02efde9f0894a5e06e6a95a1" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "e055d16dfce04dd9155d9760853925a9" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "a8a92771b22a52ba7d67a75d58327c97" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "771b6a2910d970c3431e1456456c515f" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.47.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "24aaea07606090440812c6a91ebdaf8b" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "4e6c80e4191cfad102322109509cfcaf" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "7d8037c99f7884d0b36f6bfb0a21e9f9" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "2c786bd644438dccf8f8d00cc698e638" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "f67e49b726c2c5868c67eef0c1614d52" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "615cbb9d46ee350884074fcb52a12c99" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "018d55189f6b894f1bf163fa38276429" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.48.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "425cc9f05ec5f70ddfe9aa0581dc925b" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "67ff757e5af209826aa462b9c701b8d0" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.47.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.48.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "986e7148b93ae6e218c72cbc142eb97d" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "21df0e0fbb2272da130a258dcd32601f" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "e527564eecd012383a76493b92544a13" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "28240939384abdb33e3aa301b6ba8667" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.49.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "c8d0092f873e2bcfebc710b2974215d5" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "5db3c2890b9ac209df14d8d3073a5ab4" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "3511eee36dabd5d94d58a6d6ca9ce8e6" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "eda425de208b9b0fe881a836767d417b" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "c33a5dcb2e3a8d860f72e5327b934035" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "edbda621e2ba483e29c4ea0c2ab7cb61" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "ac86a9c6c3ed07cbc6efcc402fe9a762" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "ed4953d97ded6bad559b1de93a4427f9" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "276e1f360168e3df71972876718992c0" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.49.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "32bb8b457b099d6879493af949bdb91e" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "1641e7538de24ef41a7918c3f4bdd40d" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "1f6f48cea49d465a37f2ccf523fac99b" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "9986d565ce10a99e87d0f98a64ac5415" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.50.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "1a78099e4641b687ff9cebb6d60a6a9b" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "a24013b8a616156835b3be58c7c180fd" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "65bc296c25ba79642c3f409cb55a8808" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "8ca06987b8c694c041e78cc6765017ee" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "1d6a6a7b13281ed88ac0f68b130b66e9" + }, + { + "dataPath": "params_shard_311.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "fedc4fe37d07a4e604ea5f5dce174fdf" + }, + { + "dataPath": "params_shard_312.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "c83990213d859fe981eebd90736ad710" + }, + { + "dataPath": "params_shard_313.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.51.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "5d7bcc1fce02a22fb383d35b51b50b0e" + }, + { + "dataPath": "params_shard_314.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "e2c173f62cb7fff66ea9fa23c9c5b045" + }, + { + "dataPath": "params_shard_315.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.50.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.51.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "60a47de39a459880fbb33a42eb6507fc" + }, + { + "dataPath": "params_shard_316.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "cbfd226ffc9593d256d88c38886648c1" + }, + { + "dataPath": "params_shard_317.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "525e42cd24325c6ed30120078a1fa758" + }, + { + "dataPath": "params_shard_318.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "a214730bc9eabca823d308efa64de470" + }, + { + "dataPath": "params_shard_319.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.52.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "27a51bf62d87979df1eafe827ef09e12" + }, + { + "dataPath": "params_shard_320.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "341a2c60b77573ceeeded16313e679ee" + }, + { + "dataPath": "params_shard_321.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "ffa56a446892d607beef0e4773d6c6f7" + }, + { + "dataPath": "params_shard_322.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "c2ec1b399a654655f7563a508d6ca3e9" + }, + { + "dataPath": "params_shard_323.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "5a2713b556b1c5d8ce332d2a5c409969" + }, + { + "dataPath": "params_shard_324.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "a8388bef7e5d0971f0bedd519e3ae2fd" + }, + { + "dataPath": "params_shard_325.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "b3a5669a489f6f8e0c10b73dd9044bb0" + }, + { + "dataPath": "params_shard_326.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.53.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "154d7cf37d950a9008ba4cde124e6635" + }, + { + "dataPath": "params_shard_327.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "46559eedb87ceb40995ebba042b539ab" + }, + { + "dataPath": "params_shard_328.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.52.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.53.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "59c7481385bc25c45d4b2c9c70fd6e7c" + }, + { + "dataPath": "params_shard_329.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "724b380b7032283345c29b6c4d647f68" + }, + { + "dataPath": "params_shard_330.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "1c7ce8ba922158ea6f019df9deecadc1" + }, + { + "dataPath": "params_shard_331.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "8e0519bf76f1e99d7ca014e1ad820d40" + }, + { + "dataPath": "params_shard_332.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.54.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "9649501a432c2b12d5670613e874d46b" + }, + { + "dataPath": "params_shard_333.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "b0118a5947a76c087b79e2decc3596a7" + }, + { + "dataPath": "params_shard_334.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "94d59b8554ddb8ab6b2cb14974ad5dc8" + }, + { + "dataPath": "params_shard_335.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "dcb4e25d938479e13436340b6d3d2306" + }, + { + "dataPath": "params_shard_336.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "855c3bd9f1d1e322f03b8c91904007a5" + }, + { + "dataPath": "params_shard_337.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "6af68dd2cf83a214a68fcb9777e255c3" + }, + { + "dataPath": "params_shard_338.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "e2dc9b5e5270e80b37cfdb367f0c2901" + }, + { + "dataPath": "params_shard_339.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.55.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "87597266404abe6d20664fae5521ada1" + }, + { + "dataPath": "params_shard_340.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d048b50808a8005d6cd632b11af89fe4" + }, + { + "dataPath": "params_shard_341.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.54.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.55.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "b03c245b9649c59d5a01ec5a1b995b8c" + }, + { + "dataPath": "params_shard_342.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "d71755fe76503a64ff25c860062bf6d8" + }, + { + "dataPath": "params_shard_343.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "34b9f393950ad303c39e9cf349ca4749" + }, + { + "dataPath": "params_shard_344.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "c02b16fcf6e3c0b71fa7ab95d192695f" + }, + { + "dataPath": "params_shard_345.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.56.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "1f0ef801719a77dc9188170852033a71" + }, + { + "dataPath": "params_shard_346.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "d625ffec91fd4ad8bd29e1bcf2ecf953" + }, + { + "dataPath": "params_shard_347.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "2461d9950b39bc79f0ab53c4209cc0cd" + }, + { + "dataPath": "params_shard_348.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "465aac6dddaa9c58bb1069afd24c4111" + }, + { + "dataPath": "params_shard_349.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "72e7ffbdc1ba0ae2634289348c1d8728" + }, + { + "dataPath": "params_shard_350.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "f7e6de28e2966d106b8da517542d5295" + }, + { + "dataPath": "params_shard_351.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "bd21ea613e500a65d68c469246254459" + }, + { + "dataPath": "params_shard_352.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.57.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "d3750de3a9a2795445fbefa3d6f9bc18" + }, + { + "dataPath": "params_shard_353.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "fd84f3bbacb3479a6f461d2612e461c4" + }, + { + "dataPath": "params_shard_354.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.56.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.57.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "4729cd9f52ef954fadb88ffe6a312a87" + }, + { + "dataPath": "params_shard_355.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "7881ead0fdcc594f66a75522f880501e" + }, + { + "dataPath": "params_shard_356.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "76a765a00a7c3bad28314f813e37ade1" + }, + { + "dataPath": "params_shard_357.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "9cbc55c8366c3d174f6cab6e41b909f6" + }, + { + "dataPath": "params_shard_358.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.58.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "5009952ae8c7e107e697788c33f55523" + }, + { + "dataPath": "params_shard_359.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "445ea19cea487fa15c46b5def73f8fc9" + }, + { + "dataPath": "params_shard_360.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "ba990d32a482c2b4759e55a8f5ce8efe" + }, + { + "dataPath": "params_shard_361.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "1a48080511d3085a81a1076450c4c985" + }, + { + "dataPath": "params_shard_362.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "7f3e1955ce15705e00f1ec79327a90d2" + }, + { + "dataPath": "params_shard_363.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "ace09b1885e3ceed27b952be107c8a5c" + }, + { + "dataPath": "params_shard_364.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "48d9be53aace4c6c59ea60a4cf63dddd" + }, + { + "dataPath": "params_shard_365.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.59.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "81cade3345a7ea4fb7c02135043b0a1b" + }, + { + "dataPath": "params_shard_366.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "8a88d1640e78b61ff04e65f5cec5ad66" + }, + { + "dataPath": "params_shard_367.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.58.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.59.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "db957b3c6ff1405ce6d451324949d94c" + }, + { + "dataPath": "params_shard_368.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "05915e57b0f7b344730d1ce07d0b9ce6" + }, + { + "dataPath": "params_shard_369.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "ca93b791b19e2d217ac446069023ea2a" + }, + { + "dataPath": "params_shard_370.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "d0206b7fb56e60508796019b1760386c" + }, + { + "dataPath": "params_shard_371.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "4c4dfe816746b02561c4ad4543219e8c" + }, + { + "dataPath": "params_shard_372.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "96b8070c624e052467eb11b4c0168cf0" + }, + { + "dataPath": "params_shard_373.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "f2364d4ea5d9d903b1a7c6cebf771d0e" + }, + { + "dataPath": "params_shard_374.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "a24edea6f96554ef227328c8259a6d81" + }, + { + "dataPath": "params_shard_375.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "c06d90a962117442d30f05deb0abbf03" + }, + { + "dataPath": "params_shard_376.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "d4f55aa33387ccab7a0a1f9ef0587d99" + }, + { + "dataPath": "params_shard_377.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "e2daabe8d19be77929c576226d26197c" + }, + { + "dataPath": "params_shard_378.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.60.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "d421f73fdff42ae186b8f228cb694df0" + }, + { + "dataPath": "params_shard_379.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "5f4535d5cfb521598cb0103579451d16" + }, + { + "dataPath": "params_shard_380.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.60.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "43f65f3c852f047ff46871ff68e26d83" + }, + { + "dataPath": "params_shard_381.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "84405db3cae3d902339d788ba5a438c2" + }, + { + "dataPath": "params_shard_382.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "c06df950826a52ea1a63056ccf2f9083" + }, + { + "dataPath": "params_shard_383.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "61bfb013523e44baaba094b70f088254" + }, + { + "dataPath": "params_shard_384.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.61.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "51434a0d5bf84e5b43c473aeb068e788" + }, + { + "dataPath": "params_shard_385.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.61.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "28b1842c0abc3c4574ec956b3a661bca" + }, + { + "dataPath": "params_shard_386.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d00e16f7fbb95fd351032e44ac5f47a3" + }, + { + "dataPath": "params_shard_387.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "7df5e0f75bc14a8ff0a55bfe2e275a09" + }, + { + "dataPath": "params_shard_388.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "47eb81d47788919899f2812eebfc2053" + }, + { + "dataPath": "params_shard_389.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "af8c189a8fea05eb0519e737bca3e0b1" + }, + { + "dataPath": "params_shard_390.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "7f5f38ccdcff82669c8cde2b81735dd7" + }, + { + "dataPath": "params_shard_391.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.62.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "c522fc134c09de210f48a72abe651bae" + }, + { + "dataPath": "params_shard_392.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "4e630eb8f7e857b20d90c22c80e35e27" + }, + { + "dataPath": "params_shard_393.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.61.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.62.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "a93c5c65404f22b6f69a5d937c7fa635" + }, + { + "dataPath": "params_shard_394.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "ff19fa7a892bfb1cbebd2cb19c2b4e68" + }, + { + "dataPath": "params_shard_395.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "792c307fb56360a2f1e44c8012630bb8" + }, + { + "dataPath": "params_shard_396.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "4649a9c963321982795eae0e3dfa94c8" + }, + { + "dataPath": "params_shard_397.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.63.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "335365b9937737de77de158364b96cd3" + }, + { + "dataPath": "params_shard_398.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.63.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "db6db1724fec51c327f14a74015b9fd5" + }, + { + "dataPath": "params_shard_399.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "9b813d7265a4b05c8d0dfdcfd290d821" + }, + { + "dataPath": "params_shard_400.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "e7a93b7b6457bb952f0b6c05ac27f88e" + }, + { + "dataPath": "params_shard_401.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "36954804b647585871f84d0dd2950bd6" + }, + { + "dataPath": "params_shard_402.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "15ce8690e40974fe9514c9e322a2c2d7" + }, + { + "dataPath": "params_shard_403.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "d1720405a9d283f6992ed57317fc9c63" + }, + { + "dataPath": "params_shard_404.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.64.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "11a621e4d2bedaad5597e21261f408fd" + }, + { + "dataPath": "params_shard_405.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.64.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "f1669ce81b988601ab3403f805eb632d" + }, + { + "dataPath": "params_shard_406.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.63.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.64.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.64.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.64.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "0ae23850fecbdb1bdc2533f9a9126600" + }, + { + "dataPath": "params_shard_407.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "7ab19ca70aa212ae766502ddc221dfa9" + }, + { + "dataPath": "params_shard_408.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "baeca721b7543c0ecac6ca213764d223" + }, + { + "dataPath": "params_shard_409.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "d93e3dea0ec17cf310f60d58d97c17c4" + }, + { + "dataPath": "params_shard_410.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.65.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "2f56099a9ddcb5d48a5bd3e88601e556" + }, + { + "dataPath": "params_shard_411.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.64.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.65.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.65.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.65.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "0b867bc5be14e85767579489af00389b" + }, + { + "dataPath": "params_shard_412.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.65.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d3df33ea284515e52187600099ad2e46" + }, + { + "dataPath": "params_shard_413.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "6a1310cb587cf6a13fc5793f444ecabf" + }, + { + "dataPath": "params_shard_414.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "faa730a607e0cb8b674de0431b5f7627" + }, + { + "dataPath": "params_shard_415.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "92c39f474956bb307ffe479f6be1eae7" + }, + { + "dataPath": "params_shard_416.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "cfb08fd214306bd5558bf9c39888c78a" + }, + { + "dataPath": "params_shard_417.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.66.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "5b4332a3f3d5efce622e1a5c39e92339" + }, + { + "dataPath": "params_shard_418.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.66.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "102c3133fd3924128aa86d97b47f3c3c" + }, + { + "dataPath": "params_shard_419.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.65.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.65.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.66.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.66.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.66.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "d3d12a552c8f9c13c008fc3385278295" + }, + { + "dataPath": "params_shard_420.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "40b56f80b96299fb0cfc904b713543b1" + }, + { + "dataPath": "params_shard_421.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "361bafbaf55cd11e89083a7af89055fe" + }, + { + "dataPath": "params_shard_422.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "3b1341069968b8647eee990cff7a2a54" + }, + { + "dataPath": "params_shard_423.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.67.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "f51878e988dc4ecbd05929f55aeb04d0" + }, + { + "dataPath": "params_shard_424.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.66.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.67.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.67.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.67.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "afc405398f0df23fad15f6669035b6bb" + }, + { + "dataPath": "params_shard_425.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.67.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "77c04c55e793aaf00c06375a37e7b2f6" + }, + { + "dataPath": "params_shard_426.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "0dbc05f03ae3ab87e0c526bd0cbf0592" + }, + { + "dataPath": "params_shard_427.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "fce619e5e582dec71a25a1c218c202ec" + }, + { + "dataPath": "params_shard_428.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "e3ebb436e90880aab48931ab01910348" + }, + { + "dataPath": "params_shard_429.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "28fdd59f23563d0bbc34fe15278cff00" + }, + { + "dataPath": "params_shard_430.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.68.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "87333dee8c1c545406da271ab0687fd1" + }, + { + "dataPath": "params_shard_431.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "8ab8ad26d6d2f1962ac6f0946af1cc54" + }, + { + "dataPath": "params_shard_432.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.67.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.67.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.68.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.68.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.68.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "5226df99cc3562e483ec941edf4053c3" + }, + { + "dataPath": "params_shard_433.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.69.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "302936b14b951c1cbedecca90536c5c9" + }, + { + "dataPath": "params_shard_434.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "00bfb209305e90faa344b7973d0732a3" + }, + { + "dataPath": "params_shard_435.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "742b31522dac9e14c6c8b330ed3bb2dc" + }, + { + "dataPath": "params_shard_436.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.69.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "72d40c2d404f8d3dc6553c86360ba8de" + }, + { + "dataPath": "params_shard_437.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.69.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.69.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.69.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "7c26818b5d28a22de6001af2585c1e73" + }, + { + "dataPath": "params_shard_438.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.69.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "f24c79507e3eef8e02740eb7ad59c89e" + }, + { + "dataPath": "params_shard_439.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "fdc958c7b2ccf6ba6a05ec0478653f7a" + }, + { + "dataPath": "params_shard_440.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "320c944f4af25f24ffcf711f60eede6a" + }, + { + "dataPath": "params_shard_441.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "f8f7db087c127ee4e826f034a089f5c0" + }, + { + "dataPath": "params_shard_442.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "903ea5678ab954d7ae7fec2e49970d95" + }, + { + "dataPath": "params_shard_443.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "defff342c66219147e76b4cf7f82c76a" + }, + { + "dataPath": "params_shard_444.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "448b16977c80c9cb0b6c2c32047a8310" + }, + { + "dataPath": "params_shard_445.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.69.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.69.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "9e62b5a0cf385c350f0510302eb41664" + }, + { + "dataPath": "params_shard_446.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.70.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "030c9d308b632f1cf98cca2b13cdd516" + }, + { + "dataPath": "params_shard_447.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "198acbe847600da66d4a6746cd18aef1" + }, + { + "dataPath": "params_shard_448.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "ca88131ce1602eb8ea4fa116edbd75d5" + }, + { + "dataPath": "params_shard_449.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.70.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "003afc71a40f3430250586254e3039a4" + }, + { + "dataPath": "params_shard_450.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.70.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.70.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.70.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "8f59ec414e618b1a292105201cdc7ba4" + }, + { + "dataPath": "params_shard_451.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "612149221b743c88e9adb376dd481783" + }, + { + "dataPath": "params_shard_452.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "9ba7eb055be43a5bfcb7a323c6476bf8" + }, + { + "dataPath": "params_shard_453.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "0010250d71ed8eebfb2acc79dfb85feb" + }, + { + "dataPath": "params_shard_454.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "506afd113be0fd8683e4ab6bca0cd125" + }, + { + "dataPath": "params_shard_455.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "610482b83547082e745db1ea43539bb9" + }, + { + "dataPath": "params_shard_456.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.71.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "8e6b85fa63bc2c0152a45c270d7c4583" + }, + { + "dataPath": "params_shard_457.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.71.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "546654e679a6e4475ea7713414104cb4" + }, + { + "dataPath": "params_shard_458.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.70.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.70.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.71.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.71.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.71.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "e307f969520722cdf736157210da5cc8" + }, + { + "dataPath": "params_shard_459.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.72.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "d63eea3c851b9b9986b46dabebf2dbdd" + }, + { + "dataPath": "params_shard_460.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "1b5c1b669b6995578025722395556d1d" + }, + { + "dataPath": "params_shard_461.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "f3413188dfc72599b446f084f7fcabcf" + }, + { + "dataPath": "params_shard_462.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.72.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "87dda0c098a9d8de9ee39539032e65a2" + }, + { + "dataPath": "params_shard_463.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.71.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.72.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.72.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.72.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "bae2276579a4a9ba2f638e9a608bef53" + }, + { + "dataPath": "params_shard_464.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "5ae96d1b48cb30899e79741617c0d450" + }, + { + "dataPath": "params_shard_465.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "e12bbfcbab2abebfe0917865dfb9f604" + }, + { + "dataPath": "params_shard_466.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "41b5fe301649b0f71e5d66328ae1f4ac" + }, + { + "dataPath": "params_shard_467.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "6b9f75191ac370591d1c22b1e92cd744" + }, + { + "dataPath": "params_shard_468.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "427d89137a2dcfe5bcabf2d82ea252db" + }, + { + "dataPath": "params_shard_469.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.73.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "ccdfa9eac9e029377bf27ebf7238968f" + }, + { + "dataPath": "params_shard_470.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.73.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "876ead615aca0736b0c1b8722ab0258e" + }, + { + "dataPath": "params_shard_471.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.72.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.72.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.73.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.73.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.73.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "e416417b35e02573144faa23570e8c3f" + }, + { + "dataPath": "params_shard_472.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "7b2bf395cf0d40c4079e5549a80455ca" + }, + { + "dataPath": "params_shard_473.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "a155456a6beabac7d535300f5e8944f1" + }, + { + "dataPath": "params_shard_474.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "34a6028d11ffbd706958378a347068b5" + }, + { + "dataPath": "params_shard_475.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.74.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "8ca5178d946a68cf484cd54ea6a8d685" + }, + { + "dataPath": "params_shard_476.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.73.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.74.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.74.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.74.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "1a44de9b0275ac30dad3e1ccca01c94e" + }, + { + "dataPath": "params_shard_477.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.74.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "1095e0e816d9b89c35db5185f9363c8e" + }, + { + "dataPath": "params_shard_478.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "5153811359d72e9bfbfd1812ce390b5a" + }, + { + "dataPath": "params_shard_479.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "0ff50116f4b43e0d0460d11eecd65bf5" + }, + { + "dataPath": "params_shard_480.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "a1410ffd697c2cabc41bcb7a1112cab3" + }, + { + "dataPath": "params_shard_481.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "5833074149609d670d784f270b5f1c70" + }, + { + "dataPath": "params_shard_482.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.75.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "6121ee7cd0a9fba984e483cd6e3f419a" + }, + { + "dataPath": "params_shard_483.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "06a2afa80b44aaa4d99c4a857c3fa240" + }, + { + "dataPath": "params_shard_484.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.74.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.74.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.75.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.75.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.75.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "cf181be381d5afa607cc81cc51a6d23a" + }, + { + "dataPath": "params_shard_485.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "84d645f716e8583ed7ec6d422aa8b3de" + }, + { + "dataPath": "params_shard_486.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "7d1264f4b370f756fa6e6a7ae93789e3" + }, + { + "dataPath": "params_shard_487.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "9ed5f87d3dcd6f661fc2286516d444f2" + }, + { + "dataPath": "params_shard_488.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.76.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "a0c6c5b7a86848612a95ce4beb5e4700" + }, + { + "dataPath": "params_shard_489.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.76.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.76.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.76.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "5cf50fb303ad80e310de71d132fc9af5" + }, + { + "dataPath": "params_shard_490.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.76.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "f451099bf5855ba6015c878a480cd898" + }, + { + "dataPath": "params_shard_491.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.77.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "6022bed892518650329e9ec8db268b15" + }, + { + "dataPath": "params_shard_492.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.77.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "a124800cd20060d91e3cdf44b98e40b9" + }, + { + "dataPath": "params_shard_493.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "38af286c3b5bff1c2f8f411d488971b6" + }, + { + "dataPath": "params_shard_494.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "2a8e2f45e892ef56dcd6ccce520c99b6" + }, + { + "dataPath": "params_shard_495.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.77.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "0f8739a56572c6554a47511aeb0b5d97" + }, + { + "dataPath": "params_shard_496.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.77.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "983435539409add5d3f95f8a5f2f82fe" + }, + { + "dataPath": "params_shard_497.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.76.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.76.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.77.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.77.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.77.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "bdc82fa7fd508a2ab83f87a049afc798" + }, + { + "dataPath": "params_shard_498.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.78.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "fe94209604ae406302830dfd7c42192d" + }, + { + "dataPath": "params_shard_499.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "d01dd68d6b00cfa8baa3bb5fa23809f8" + }, + { + "dataPath": "params_shard_500.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "13aed7a6f1df5e68fc1b2f783471757d" + }, + { + "dataPath": "params_shard_501.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.78.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "e58b9ee8bd263cb9a767adb89409a32d" + }, + { + "dataPath": "params_shard_502.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.77.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.78.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.78.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.78.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "14ddcce10804a6430b253681d3dce5ba" + }, + { + "dataPath": "params_shard_503.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d679ffa6121787733ca270a1d986c9aa" + }, + { + "dataPath": "params_shard_504.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.79.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "11a90d8481b0c9f37b3d4b84696d0a2b" + }, + { + "dataPath": "params_shard_505.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.79.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "60eafdeb63b48ea8631840f5336d33ee" + }, + { + "dataPath": "params_shard_506.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "8dd033bc2d69af771afe787090e29f32" + }, + { + "dataPath": "params_shard_507.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "345f598826cadd0cc9f8fc8577d49d81" + }, + { + "dataPath": "params_shard_508.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.79.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "ff23ccfff9ff36491acade3cf7a26fee" + }, + { + "dataPath": "params_shard_509.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "ea96938d1e083d0b86c93e9b3ea02509" + }, + { + "dataPath": "params_shard_510.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.78.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.78.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.79.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.79.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.79.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "088e72a5dbe8cf0586c682d8350cf08c" + }, + { + "dataPath": "params_shard_511.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "f5a6a3c2703eee7e9cc3cf091c64dc83" + }, + { + "dataPath": "params_shard_512.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "ef84bfae091ab5978e78cc09fbed82f3" + }, + { + "dataPath": "params_shard_513.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "5d9538a607589587466bc22a711b71d6" + }, + { + "dataPath": "params_shard_514.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "70dba92dbb998f78211e9f68ed19ea86" + }, + { + "dataPath": "params_shard_515.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "7aab43b6e7e7d696e00b643755e83c95" + }, + { + "dataPath": "params_shard_516.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "b0b43bd860598944db3bdac2db59aa8c" + }, + { + "dataPath": "params_shard_517.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.80.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "54c8d2507e0b31cfc1f9812eb1b454fe" + }, + { + "dataPath": "params_shard_518.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.80.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "2fba90cac91e9713d6ecfd24cd0ff6df" + }, + { + "dataPath": "params_shard_519.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.80.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "3726c0880bc6e9508d7c6a74b012dbca" + }, + { + "dataPath": "params_shard_520.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.80.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "42c74b93e644be10ab57620b89af7eaf" + }, + { + "dataPath": "params_shard_521.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.80.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "b2a42106034be27259257c0e7b9d23b0" + }, + { + "dataPath": "params_shard_522.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.80.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "74d6b60a66bbaa1db6c824a1d87b60c3" + }, + { + "dataPath": "params_shard_523.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.80.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.80.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.80.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "37f1cd25901994a489b82f663707bb2d" + }, + { + "dataPath": "params_shard_524.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.81.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "36aca4289a11c4797ba082e9d0d98b0f" + }, + { + "dataPath": "params_shard_525.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.81.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "73203e7892631ee7048d8d34ed0f0a56" + }, + { + "dataPath": "params_shard_526.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.81.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "7293521c83661ba8eb8e88e4fa2f6ff7" + }, + { + "dataPath": "params_shard_527.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.81.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "45c5c1d7cd2959c099c925dc5aa142bc" + }, + { + "dataPath": "params_shard_528.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.80.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.81.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.81.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.81.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "e1feb248b4b44567d04435b072afcb48" + }, + { + "dataPath": "params_shard_529.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.81.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d2a0680dff79a47131a3c7db2fcf8bd8" + }, + { + "dataPath": "params_shard_530.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.82.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "ba90f1a961352bac141d01e43536f811" + }, + { + "dataPath": "params_shard_531.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.82.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "f3cddc3970c2631a5f71bd124a9648d2" + }, + { + "dataPath": "params_shard_532.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.82.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "7e58b32e0a11bd22445626a1baf30782" + }, + { + "dataPath": "params_shard_533.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.82.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "af8bdbaceaef2e2fa9412e7f17eafc0a" + }, + { + "dataPath": "params_shard_534.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.82.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "2d51fc54e40460e8dff8a6c63a870772" + }, + { + "dataPath": "params_shard_535.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.82.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "19a76f9cd443bd0187ba4f94b806d1f6" + }, + { + "dataPath": "params_shard_536.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.81.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.81.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.82.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.82.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.82.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "0a34619516a56fdf9108c8c651077c22" + }, + { + "dataPath": "params_shard_537.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.83.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "22ca56d53b0140ea0ae41483f2d6f684" + }, + { + "dataPath": "params_shard_538.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.83.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "efee417315f08231589b5b07be19e0f5" + }, + { + "dataPath": "params_shard_539.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.83.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "5dc5415059ae68298228778cb75e3e85" + }, + { + "dataPath": "params_shard_540.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.83.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "894f4109e5afe736b012d5b2deac3e4f" + }, + { + "dataPath": "params_shard_541.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.82.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.83.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.83.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.83.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "d799a9e48831db44f1cb25567f51f495" + }, + { + "dataPath": "params_shard_542.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.83.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "300d55ebe98adac959220d72095a7802" + }, + { + "dataPath": "params_shard_543.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.84.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "164d0ebc2a9d15a0ea8ce392b436120a" + }, + { + "dataPath": "params_shard_544.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.84.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "4dff9bbfc03a7eebb7f22f1fb69510cc" + }, + { + "dataPath": "params_shard_545.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.84.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "4297550c8ae90cb2a04e5d9bf940f511" + }, + { + "dataPath": "params_shard_546.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.84.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "c32de68885fc2d10483336b80a1ef5aa" + }, + { + "dataPath": "params_shard_547.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.84.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "bb8460ed209be6d08b7c368463f38845" + }, + { + "dataPath": "params_shard_548.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.84.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "a8f68fbfc7d48a0da89305c78b73610c" + }, + { + "dataPath": "params_shard_549.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.83.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.83.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.84.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.84.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.84.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "11430c6207c5558fedd0899a90c2c78d" + }, + { + "dataPath": "params_shard_550.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.85.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "f3dbdb9c16d89200ef2685449345cc94" + }, + { + "dataPath": "params_shard_551.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.85.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "6b266d11f0c310f4bcae813bdf3a5b66" + }, + { + "dataPath": "params_shard_552.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.85.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "38cfc96562a6b1a34ab3d9875837e5c2" + }, + { + "dataPath": "params_shard_553.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.85.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "e6a55f6d921c718a0b1486ea092ebef1" + }, + { + "dataPath": "params_shard_554.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.84.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.85.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.85.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.85.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "d28ed7cc362f3a665a8094267e168c83" + }, + { + "dataPath": "params_shard_555.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.85.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "3d87672e208fc18f5b0b07fd30aec7b4" + }, + { + "dataPath": "params_shard_556.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.86.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "df30798d95ea28c37601fc57537eb4f9" + }, + { + "dataPath": "params_shard_557.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.86.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "fa6edd2f2dabcd35d8aa927d03c19216" + }, + { + "dataPath": "params_shard_558.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.86.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "8cbdedca39299e834790cb507c13e28f" + }, + { + "dataPath": "params_shard_559.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.86.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "dc0481b01523da127e49b39540dc883a" + }, + { + "dataPath": "params_shard_560.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.86.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "65cce8c6d008a2b4beb1721dd2f409e2" + }, + { + "dataPath": "params_shard_561.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.86.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "c7710a76f817449b8cc7ded82ae418d0" + }, + { + "dataPath": "params_shard_562.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.85.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.85.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.86.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.86.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.86.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "e777e294d139e533a48bc339e25ced36" + }, + { + "dataPath": "params_shard_563.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.87.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "e37a6ad5e7f3fd6853c617772f32676d" + }, + { + "dataPath": "params_shard_564.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.87.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "696f2cb5534db6af3ca02c1a86760c95" + }, + { + "dataPath": "params_shard_565.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.87.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "eb04788e38f00f0b1f4d76d65ee5856a" + }, + { + "dataPath": "params_shard_566.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.87.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "b162b50fdc2c2365715b8bb74af69eea" + }, + { + "dataPath": "params_shard_567.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.86.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.87.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + }, + { + "name": "model.layers.87.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 9461760 + }, + { + "name": "model.layers.87.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 31481856 + } + ], + "md5sum": "b1bfa1410b10b0280398f18c7494ec64" + }, + { + "dataPath": "params_shard_568.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.87.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "c1d2942161fbe42f3bf33895138bc3e0" + }, + { + "dataPath": "params_shard_569.bin", + "format": "raw-shard", + "nbytes": 176160768, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 12288, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 176160768, + "byteOffset": 0 + } + ], + "md5sum": "fb34876a008651ca760b3a5d67e2bbed" + }, + { + "dataPath": "params_shard_570.bin", + "format": "raw-shard", + "nbytes": 22020096, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 12288, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 22020096, + "byteOffset": 0 + } + ], + "md5sum": "8679e82fd29223d2a4ff86ba4f4c9d4d" + }, + { + "dataPath": "params_shard_571.bin", + "format": "raw-shard", + "nbytes": 352321536, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 352321536, + "byteOffset": 0 + } + ], + "md5sum": "609fcc277b4762b192d84ac3ad02634e" + }, + { + "dataPath": "params_shard_572.bin", + "format": "raw-shard", + "nbytes": 44040192, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44040192, + "byteOffset": 0 + } + ], + "md5sum": "bce2349bf0a64ef134ff08fbf2a872c8" + }, + { + "dataPath": "params_shard_573.bin", + "format": "raw-shard", + "nbytes": 88080384, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 14336, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 88080384, + "byteOffset": 0 + } + ], + "md5sum": "ec04009c74f1ece17b8d9da1e8e4531c" + }, + { + "dataPath": "params_shard_574.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 12288, + 1536 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "a6e67ca5ecb997c5f4ea0c656d1e6838" + }, + { + "dataPath": "params_shard_575.bin", + "format": "raw-shard", + "nbytes": 31506432, + "records": [ + { + "name": "model.layers.87.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 0 + }, + { + "name": "model.layers.87.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11010048 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20447232 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 20471808 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 14336, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11010048, + "byteOffset": 20496384 + } + ], + "md5sum": "c5ba800ee77c1517d0c380bf998c55b6" + }, + { + "dataPath": "params_shard_576.bin", + "format": "raw-shard", + "nbytes": 9461760, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 12288, + 384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.norm.weight", + "shape": [ + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24576, + "byteOffset": 9437184 + } + ], + "md5sum": "4cbb498898b66be4a3a69cce067fd5e2" + } + ] +} \ No newline at end of file