stablelm-zephyr-3b-q0f32-MLC / ndarray-cache-b16.json
CharlieFRuan's picture
Initial commit
8790ec4 verified
raw
history blame
121 kB
{
"metadata": {
"ParamSize": 260,
"ParamBytes": 11181772800.0,
"BitsPerParam": 32.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 257556480,
"records": [
{
"name": "lm_head.weight",
"shape": [
50304,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 257556480,
"byteOffset": 0
}
],
"md5sum": "99eb0ede31a902b93147feb99d9bd47d"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 257556480,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
50304,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 257556480,
"byteOffset": 0
}
],
"md5sum": "1420f5360de15c3a6f20ce7769219710"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "70ecf379e529f532e184a114fa5f7c5f"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ee1ad9994c5f70603a9e8c9b183d808e"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "ac38295359989cb809773c375a2678ae"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "e1c1ec2ab45426cd7e4f26505c50a778"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "b583b8645e7384a1216a8184115fa2f2"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "35954e59ad9beb75790cd262d64dc677"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "02d233e2e4cdbea8d043ff441cb09aa7"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "7566580833a0a4445f2041f65fcc7e66"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "9935d5d7f8c20a9e22b0c3c011f5c6b9"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 26275840,
"records": [
{
"name": "model.layers.0.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 5120
},
{
"name": "model.layers.0.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 10240
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 15360
},
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 20480
},
{
"name": "model.layers.1.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13127680
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13132800
},
{
"name": "model.layers.1.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13137920
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13143040
},
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 13148160
},
{
"name": "model.layers.10.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26255360
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26260480
},
{
"name": "model.layers.10.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26265600
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26270720
}
],
"md5sum": "fe1af670d2514a5ff1cd60cc3ae3610b"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "568387cc5e4071a81d719bd1315dc46f"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "7b8ce4f2a356f8fd77811e50ee3494d7"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "60be05c22284c2e5afbaf31f12c0f227"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "df84937ec8af3f250774a20843018acd"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "e142f525e885f7653a2260ec17d304e3"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "4c509399fdf16dbf48f26edb6c759114"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 26255360,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.11.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13107200
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13112320
},
{
"name": "model.layers.11.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13117440
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 13127680
},
{
"name": "model.layers.12.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26234880
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26240000
},
{
"name": "model.layers.12.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26245120
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26250240
}
],
"md5sum": "f2e3f540212bca3f4021d2220196b1c4"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "5869255516b1bdc16ee7b009a7558e61"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "8390fb3651f6fafa43e6d13250da459b"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "c7d80658bad27a2075ee2909fbd45c3d"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "39531349b116301e1a534eee9d5cda18"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "5f09cac29ba1c85827122cf43405db26"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "86a03a42b444179917b62c49c8299a8f"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 26255360,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.13.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13107200
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13112320
},
{
"name": "model.layers.13.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13117440
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 13127680
},
{
"name": "model.layers.14.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26234880
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26240000
},
{
"name": "model.layers.14.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26245120
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26250240
}
],
"md5sum": "9068a4ac81157c0b8b62cdd5fe66f4a2"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "3c0b80880233f0ff152e4ac482eecd80"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "7645abb618c7898d9b8c9ee5e2154b56"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "49b8b1e8adc93dcf0093e7e8b2db4f57"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "52934a944067c74144b12dc9432441b9"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "6af97de98960425df08a492010542aad"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "705332bb90492da14aa53e33ec6f1fb1"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 26255360,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.15.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13107200
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13112320
},
{
"name": "model.layers.15.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13117440
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 13127680
},
{
"name": "model.layers.16.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26234880
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26240000
},
{
"name": "model.layers.16.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26245120
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26250240
}
],
"md5sum": "68f6dcca292e731bd8f2b48de4bd170f"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "39e7225f77dbc9b4a45474ad0c19525a"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "902d9adf4fa729bbafae05769fec711e"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "53f3ebdb365e59d52e64ee950750f7b7"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "2012ff05090ef906647a7652cdd16fbc"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "9df1fcac9475d8074e4080f71ccc2c4b"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "acd9b665f7a3dfdb0a964bea34692eef"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 26255360,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.17.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13107200
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13112320
},
{
"name": "model.layers.17.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13117440
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 13127680
},
{
"name": "model.layers.18.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26234880
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26240000
},
{
"name": "model.layers.18.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26245120
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26250240
}
],
"md5sum": "4c97193f1661f62b7b34a83821ed89f4"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "bee738becb48f282a724c6d10264222c"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "c00819fb6e1e5561cca84732e8c688f3"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "908e4f1882b297e80572f2c6b3c71be1"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "9ca8cbbcb9f8cb7e9aaeab45c236d6ed"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "7a7b92df65ab0131a90b6375955acba2"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "c2ed32867ee9e210ce73781bd3df651c"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 26255360,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.19.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13107200
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13112320
},
{
"name": "model.layers.19.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13117440
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 13127680
},
{
"name": "model.layers.2.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26234880
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26240000
},
{
"name": "model.layers.2.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26245120
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26250240
}
],
"md5sum": "a431061920da8aa07191ad1ef06ae585"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.20.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "2cf015c1421c06236ea0e414c579c158"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "c8a809f8b260662991eadf2c89afed0e"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "eb442f5cbe2dd408a042ff70b6d1d78b"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.21.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a127d63d90fbdcf0295a6314a59c38a0"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "4ac4dbe8e38573eeec2fc511e8fcfe55"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "6a63aeb2ed620bebeb8d14a5db9dc50d"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 26255360,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.20.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13107200
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13112320
},
{
"name": "model.layers.20.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13117440
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 13127680
},
{
"name": "model.layers.21.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26234880
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26240000
},
{
"name": "model.layers.21.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26245120
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26250240
}
],
"md5sum": "04c4ab7b78313131abcc066f8b8f08d1"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.22.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "3e13d352a63d0c2ab24dd92776d9b9d4"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "89416d3251bc5ab021d4cde8389df226"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "ad60f34e9470803a590668d38f571cdf"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.23.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "4130d4f06d3af7fdd00060bd66f9e9c0"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "e8191fc3196b458a2682d8001a6a6a62"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "307aff9e249c46c654d486542f66319f"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 26255360,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.22.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13107200
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13112320
},
{
"name": "model.layers.22.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13117440
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "model.layers.22.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 13127680
},
{
"name": "model.layers.23.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26234880
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26240000
},
{
"name": "model.layers.23.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26245120
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26250240
}
],
"md5sum": "2cc1fd120e8f0b22966e0f28167f3840"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.24.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a4c25bce39dd1a29c6f03c3651ca568c"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "1dd537b0185667f3e562b6eb24e35257"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "9d993ab727558107c9105553865ec33f"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.25.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "3a9f4dea298cb5fc728cbc114d9e0a08"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "1567f5f202134f1b1c872f8bd60f3cc0"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "375f6bc8930133672f1184d261df3db9"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 26255360,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.24.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13107200
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13112320
},
{
"name": "model.layers.24.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13117440
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "model.layers.24.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 13127680
},
{
"name": "model.layers.25.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26234880
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26240000
},
{
"name": "model.layers.25.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26245120
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26250240
}
],
"md5sum": "b9af6516f2a8fd68e32f6f39c242d9f8"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.26.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "696f254d8ce817a88bd0314b0000ad0b"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "c81f42fe2250842393845dda77908bad"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "438b307c84f6ab42eb14bffe65395de7"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.27.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "7517251998df10c79e89b38e3c65c45e"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "72dfc3068787875d939c848b304fb151"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "b9112ad1aa82c521bd4016390ffffbce"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 26255360,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.26.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13107200
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13112320
},
{
"name": "model.layers.26.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13117440
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "model.layers.26.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 13127680
},
{
"name": "model.layers.27.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26234880
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26240000
},
{
"name": "model.layers.27.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26245120
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26250240
}
],
"md5sum": "f7b5468922320ec7fd16fe1a8de71e67"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.28.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "b44012f0911bd7a6eaf3afe8c620d773"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "7ed42bf7d604bd9bb931bbc76f3b0876"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "b8414fe2548b6461e53d96e5ada1f06c"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.29.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "c2d18e497f0e7dedaefc5821de33a2f6"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "fe0caaa9c12491490e0cab979afbfbde"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "508ef01762e88879118270e32663b0b5"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 26255360,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.28.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13107200
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13112320
},
{
"name": "model.layers.28.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13117440
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "model.layers.28.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 13127680
},
{
"name": "model.layers.29.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26234880
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26240000
},
{
"name": "model.layers.29.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26245120
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26250240
}
],
"md5sum": "10356d54769c1cf4daf6d0d061e7597c"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "8a108425106fe155e99ba85b8feeb0c7"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "109335d942b6b32472d978fca6588fb8"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "84e4df98dea25864908845db2a24a59a"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.30.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "ba41ab48227b018fd1ac9d8f4b907552"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "b8631ee37d36999dae3cfad9289afc1e"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "1fd45eb4e9cec596381ff42208720900"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 26255360,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.3.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13107200
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13112320
},
{
"name": "model.layers.3.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13117440
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 13127680
},
{
"name": "model.layers.30.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26234880
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26240000
},
{
"name": "model.layers.30.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26245120
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26250240
}
],
"md5sum": "7f61fad719562a9d2ad0a0b85ed57a2a"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.31.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "33947ce2249c4b5ffaf46fd7d67c5cf6"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "b7de03cd37f1c00920ada1a37ed7d3f1"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "7e0c3cc7e71f02cbf653e1274ef5a8e5"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "f5f2724c28985d25a65242b42ee0ac2f"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "4f7de00252f2fe522d59366d5d9f9afc"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "7c6c5740a6068dd6859ada709e37cb90"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 26255360,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.31.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13107200
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13112320
},
{
"name": "model.layers.31.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13117440
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "model.layers.31.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 13127680
},
{
"name": "model.layers.4.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26234880
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26240000
},
{
"name": "model.layers.4.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26245120
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26250240
}
],
"md5sum": "893f7edce6b6c9a9dcdec992491854ad"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "f52a6c84a519d353902ff607c518f070"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "74a260d6e729686df3d796b17118c9de"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "07a2a84cfc4b5d3a53bebc8c0868160e"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "7b48aeb51b310a06fdf40f43b6b8d0c1"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "734f8079c35b12db55465490310212fb"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "085e9c636cc0d78ccf0d7805348a3061"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 26255360,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.5.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13107200
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13112320
},
{
"name": "model.layers.5.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13117440
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 13127680
},
{
"name": "model.layers.6.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26234880
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26240000
},
{
"name": "model.layers.6.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26245120
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26250240
}
],
"md5sum": "d15703d436cfeb81200d84e323d5e6b3"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "370c6a2766d203f1dafb567ecee6d851"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "04dd3bcc76eccadc6235fa8ab8cc3236"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "6a624b50b66b6b48f90e1313da7bfbe6"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "7fb3fc9f444dc78b7400ff30e2efba94"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "7984731a769cdd06a6f3f5d7dfbc2a04"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "34d89066ad1a714f3eabfb6c5fc2bb7d"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 26255360,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.7.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13107200
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13112320
},
{
"name": "model.layers.7.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13117440
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 13127680
},
{
"name": "model.layers.8.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26234880
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26240000
},
{
"name": "model.layers.8.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26245120
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26250240
}
],
"md5sum": "b4ca7a650917185cc46521475941d514"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
2560,
6912
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "aee760bf032dec0a6a340953ee7731a7"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
13824,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "beb2232023d1f19219e26a51a1b92afd"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.weight",
"shape": [
7680,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "caec1303a946754faffccc92b1254e4f"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 26245120,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.9.input_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13107200
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13112320
},
{
"name": "model.layers.9.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13117440
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
2560,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 13107200,
"byteOffset": 13127680
},
{
"name": "model.norm.bias",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26234880
},
{
"name": "model.norm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 26240000
}
],
"md5sum": "dbfb209f08c627381e1ee1069911ad04"
}
]
}