diff --git "a/ndarray-cache-b16.json" "b/ndarray-cache-b16.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache-b16.json" @@ -0,0 +1,3651 @@ +{ + "metadata": { + "ParamSize": 260, + "ParamBytes": 11181772800.0, + "BitsPerParam": 32.0 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 257556480, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 50304, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 257556480, + "byteOffset": 0 + } + ], + "md5sum": "99eb0ede31a902b93147feb99d9bd47d" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 257556480, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 50304, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 257556480, + "byteOffset": 0 + } + ], + "md5sum": "1420f5360de15c3a6f20ce7769219710" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "70ecf379e529f532e184a114fa5f7c5f" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ee1ad9994c5f70603a9e8c9b183d808e" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "ac38295359989cb809773c375a2678ae" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "e1c1ec2ab45426cd7e4f26505c50a778" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b583b8645e7384a1216a8184115fa2f2" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "35954e59ad9beb75790cd262d64dc677" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "02d233e2e4cdbea8d043ff441cb09aa7" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "7566580833a0a4445f2041f65fcc7e66" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "9935d5d7f8c20a9e22b0c3c011f5c6b9" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 26275840, + "records": [ + { + "name": "model.layers.0.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 5120 + }, + { + "name": "model.layers.0.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 10240 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 15360 + }, + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 20480 + }, + { + "name": "model.layers.1.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13127680 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13132800 + }, + { + "name": "model.layers.1.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13137920 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13143040 + }, + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 13148160 + }, + { + "name": "model.layers.10.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26255360 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26260480 + }, + { + "name": "model.layers.10.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26265600 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26270720 + } + ], + "md5sum": "fe1af670d2514a5ff1cd60cc3ae3610b" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "568387cc5e4071a81d719bd1315dc46f" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "7b8ce4f2a356f8fd77811e50ee3494d7" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "60be05c22284c2e5afbaf31f12c0f227" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "df84937ec8af3f250774a20843018acd" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e142f525e885f7653a2260ec17d304e3" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "4c509399fdf16dbf48f26edb6c759114" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.11.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.11.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.12.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.12.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "f2e3f540212bca3f4021d2220196b1c4" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "5869255516b1bdc16ee7b009a7558e61" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8390fb3651f6fafa43e6d13250da459b" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "c7d80658bad27a2075ee2909fbd45c3d" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "39531349b116301e1a534eee9d5cda18" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5f09cac29ba1c85827122cf43405db26" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "86a03a42b444179917b62c49c8299a8f" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.13.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.13.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.14.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.14.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "9068a4ac81157c0b8b62cdd5fe66f4a2" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "3c0b80880233f0ff152e4ac482eecd80" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "7645abb618c7898d9b8c9ee5e2154b56" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "49b8b1e8adc93dcf0093e7e8b2db4f57" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "52934a944067c74144b12dc9432441b9" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "6af97de98960425df08a492010542aad" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "705332bb90492da14aa53e33ec6f1fb1" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.15.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.15.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.16.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.16.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "68f6dcca292e731bd8f2b48de4bd170f" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "39e7225f77dbc9b4a45474ad0c19525a" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "902d9adf4fa729bbafae05769fec711e" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "53f3ebdb365e59d52e64ee950750f7b7" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "2012ff05090ef906647a7652cdd16fbc" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9df1fcac9475d8074e4080f71ccc2c4b" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "acd9b665f7a3dfdb0a964bea34692eef" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.17.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.17.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.18.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.18.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "4c97193f1661f62b7b34a83821ed89f4" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "bee738becb48f282a724c6d10264222c" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c00819fb6e1e5561cca84732e8c688f3" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "908e4f1882b297e80572f2c6b3c71be1" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "9ca8cbbcb9f8cb7e9aaeab45c236d6ed" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "7a7b92df65ab0131a90b6375955acba2" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "c2ed32867ee9e210ce73781bd3df651c" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.19.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.19.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.2.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.2.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "a431061920da8aa07191ad1ef06ae585" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "2cf015c1421c06236ea0e414c579c158" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c8a809f8b260662991eadf2c89afed0e" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "eb442f5cbe2dd408a042ff70b6d1d78b" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "a127d63d90fbdcf0295a6314a59c38a0" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4ac4dbe8e38573eeec2fc511e8fcfe55" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "6a63aeb2ed620bebeb8d14a5db9dc50d" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.20.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.20.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.21.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.21.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "04c4ab7b78313131abcc066f8b8f08d1" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "3e13d352a63d0c2ab24dd92776d9b9d4" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "89416d3251bc5ab021d4cde8389df226" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "ad60f34e9470803a590668d38f571cdf" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "4130d4f06d3af7fdd00060bd66f9e9c0" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e8191fc3196b458a2682d8001a6a6a62" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "307aff9e249c46c654d486542f66319f" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.22.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.22.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.23.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.23.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "2cc1fd120e8f0b22966e0f28167f3840" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "a4c25bce39dd1a29c6f03c3651ca568c" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "1dd537b0185667f3e562b6eb24e35257" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "9d993ab727558107c9105553865ec33f" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "3a9f4dea298cb5fc728cbc114d9e0a08" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "1567f5f202134f1b1c872f8bd60f3cc0" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "375f6bc8930133672f1184d261df3db9" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.24.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.24.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.24.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.25.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.25.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "b9af6516f2a8fd68e32f6f39c242d9f8" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "696f254d8ce817a88bd0314b0000ad0b" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c81f42fe2250842393845dda77908bad" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "438b307c84f6ab42eb14bffe65395de7" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "7517251998df10c79e89b38e3c65c45e" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "72dfc3068787875d939c848b304fb151" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b9112ad1aa82c521bd4016390ffffbce" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.26.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.26.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.26.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.27.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.27.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "f7b5468922320ec7fd16fe1a8de71e67" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "b44012f0911bd7a6eaf3afe8c620d773" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "7ed42bf7d604bd9bb931bbc76f3b0876" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b8414fe2548b6461e53d96e5ada1f06c" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "c2d18e497f0e7dedaefc5821de33a2f6" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "fe0caaa9c12491490e0cab979afbfbde" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "508ef01762e88879118270e32663b0b5" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.28.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.28.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.28.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.29.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.29.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "10356d54769c1cf4daf6d0d061e7597c" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "8a108425106fe155e99ba85b8feeb0c7" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "109335d942b6b32472d978fca6588fb8" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "84e4df98dea25864908845db2a24a59a" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "ba41ab48227b018fd1ac9d8f4b907552" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b8631ee37d36999dae3cfad9289afc1e" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "1fd45eb4e9cec596381ff42208720900" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.3.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.3.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.30.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.30.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "7f61fad719562a9d2ad0a0b85ed57a2a" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "33947ce2249c4b5ffaf46fd7d67c5cf6" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b7de03cd37f1c00920ada1a37ed7d3f1" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "7e0c3cc7e71f02cbf653e1274ef5a8e5" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "f5f2724c28985d25a65242b42ee0ac2f" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4f7de00252f2fe522d59366d5d9f9afc" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "7c6c5740a6068dd6859ada709e37cb90" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.31.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.31.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.31.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.4.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.4.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "893f7edce6b6c9a9dcdec992491854ad" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "f52a6c84a519d353902ff607c518f070" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "74a260d6e729686df3d796b17118c9de" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "07a2a84cfc4b5d3a53bebc8c0868160e" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "7b48aeb51b310a06fdf40f43b6b8d0c1" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "734f8079c35b12db55465490310212fb" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "085e9c636cc0d78ccf0d7805348a3061" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.5.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.5.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.6.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.6.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "d15703d436cfeb81200d84e323d5e6b3" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "370c6a2766d203f1dafb567ecee6d851" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "04dd3bcc76eccadc6235fa8ab8cc3236" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "6a624b50b66b6b48f90e1313da7bfbe6" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "7fb3fc9f444dc78b7400ff30e2efba94" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "7984731a769cdd06a6f3f5d7dfbc2a04" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "34d89066ad1a714f3eabfb6c5fc2bb7d" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.7.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.7.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.8.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.8.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "b4ca7a650917185cc46521475941d514" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "aee760bf032dec0a6a340953ee7731a7" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "beb2232023d1f19219e26a51a1b92afd" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "caec1303a946754faffccc92b1254e4f" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 26245120, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.9.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.9.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.norm.bias", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.norm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5120, + "byteOffset": 26240000 + } + ], + "md5sum": "dbfb209f08c627381e1ee1069911ad04" + } + ] +} \ No newline at end of file