|
{ |
|
"metadata": { |
|
"ParamSize": 170, |
|
"ParamBytes": 17075361792.0, |
|
"BitsPerParam": 16.0 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 1572864000, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.weight", |
|
"shape": [ |
|
256000, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864000, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c60082db0e4d8f35e7289fd7de2d6953" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0f4e2e963889085e8afe6bc6c49f5572" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "14fb73c20a944f08d1ca3fbc249a4dcb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "86a00af176de654eddef55171a294f02" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f95cbe4ce39a600c2e2ea3b4a79d3506" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "58c5b02dbdfb6f4ad7dc07a987487d81" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f824e83e1c09800fed5d9e501cbef9ed" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6b4b1df277cfbc3b70d620be9b0fd82f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aa05d7ae7d4eb220f37747d841c39f41" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a04a7a8dfba3225e5c9e4d08f5d991c8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "69642bf15d15e8e3d0a50df2c87b3c79" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1bd0562941d5344f2f2981de3eda76b5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5241c2d095d69394ec4ea68ba6f23f9e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1b1b08abf4a0d04a65bf68736d8673f7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6510c5f58389931a2d5e1943a4f6aa2e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4ba676881606031a5ee0b74e33fb88de" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "40d0b41abee65ea73e3a4c82a6fa65c3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f3187d1dc182a8f95ceab7df293cb152" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0a1d0e2b8358cd2b4856c73c250dd3a3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aa2041a5903e8b1079fa61a8cd603429" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "09e715068b771738ffa53ace091b7f0b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4054e3e3ba3296ba506ab84288192da9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "194504b7b8ab52be14b2581a772ba2d1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "19d6c58020599c295a7c72322240d15f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "02f69e4867691d3b47d95927a56854ab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6ae680ce108a1e6d457056c82ae18bcb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "547cbecf7fd4ee5e09053084470211c8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "78a402b92e7a0b524531fc7229cfe3eb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bbe991c38a3544a1a32286d300efdf3d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "eff50d4dc40d27ff7f0b45d9361ee8fa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1e65975a0acf68aa3dead36688fae7c6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "66d5d789c9a68fc43f0432dee2025af1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "21b8e6c6330437b35c12bac84fc5aa55" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "297b687fc59ee54860e1e086a189d049" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "64c4395080e11b49919f611fac8f5bd0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2168bb7b517618736468495b64de6492" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7601fdd9f5c4172ad568a88b92780d4f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6dced480e397d0e4b0f4e229e59168b3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b7a2901c6cb2f4e1a84b2559bee2c205" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "668d44bcf32609f674422885adbb2568" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9f7f06099c20e9561b85bbbd71083a9d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "36e8659919fcc7c0e9cd15bf7ee20638" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "69272f810a63454b7e493ca8db2e4e01" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "97ec6c6098706829215b9775c002b32b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1b79bfda3ec65213084ac12ceee29f33" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b807f3ab603cd37a846676d193f9d277" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e5dd0bf094a56e3d175d2fdf163b8b7d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b5bcc5621ca7a57eb12c483feeb43307" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8217f638eeb1e46fe3868fd8e8bf100e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dba591e829aac359033b6e918382810e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dbc527f7fa51b6884d6653380f293d2a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f1d1a0bb4f98bd51126c57fe77684dce" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f58da53e30d94e45a0a369e0a1240ee2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1e27f4032aabc28bbe3cd547f8e186d7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "408fc336ff7f18a7c83fb7fa603a4998" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "538df5b59596e2902efe2bc0af1568b6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e814405b553d7ec957cad8f3d616ebc7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3f81f8bc3e1646d20574a7bf8d96b0d5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e1a1953eb2ccac5af2d7590e6d2b5ec5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ce270223bc558db0a5d27af2116fba53" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "59e777a792b4913307bfda4cf4075a71" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c7a7d717f3b9f832dcaef9749a97c74f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "385607fea47c6f19a9fd39de55901040" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c7c9818761d650fea693ab80b98a9d1d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "48ea87d4d016f945999069724103aac9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0ff42527fb75bd5513d8c18ab7c59d98" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2d87584e9446fb1ebf02c1f1709f3ab1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a952eddcef19e87acba07a874b8f37e7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "12fda1b30881783dbe2e19b782ad94fa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2b7afa8bf48f008e0ebed5abcd2f15ea" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "260f73d27ed48ba8a642b899eec416bc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9d65ef1f2796ef68e935d6014ca5ba1a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3171884795345165c875daa55a17fe94" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2cfb567bc6879de46040a7f8680d8066" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7c1f33a25629a88883fd6c1526fda4a8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dcd7f2a5e7045ff5cf5f581832219172" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fbaa14547db7f71b1885972de54cf52a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b73203d60d4898d6f198982b94ef17f3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "206abc6ecd1f274098f872f4881e3cca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4428c7ab551722a23e8d07042b9f88d1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "00c3310f6c68029e3681e6e29acea1f3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6e8f193ec13343dd3282e3c72cf16498" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "84e566f57f2c1c687fe22e5a709c75d4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0b4e188c7aab9c318bc26bc00fa6eaa7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6b956e65718e312b8f64d65480a7eb73" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aa1eba42e21c4a3a209ed123807a5cc5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9c593249fc8574d8d11888f36a4ebc8f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9a9767c0741c14fb34c278847729ca41" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fefa508ad96e075de33755d3e77c0b02" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "272780eda760a1b632ee0898bd822bed" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0c1787fdf9ada9a3cbc2615a3557dc6e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0549b7e823aae895993f740fb5f92d1f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "947ff6f419800cdad216c95bb6eae64b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c03fc0850c2564e0c0b7c2420601f1e9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a6ff8555eede8310413c7f0edc374b2f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3f9577771286399bee6784def87c1a18" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cf1fbf37bec4183e93eef194cc87cc26" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "31edc9bcc3145aece6c8ee2d7b0583d7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a90e87939fe6ca58ddf0de8ebd52c62e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c0b0e83fa9c6d85c2b92efc97bb9dcd6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "74f19307deb63ec97ff3aa8f5e1ac00a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e9d88cc3c3265e7b073f039e020e3fba" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c3832849eba8cbf73f87dead864a76d0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a51bb7fd741c29ef91dd4fadc787e03d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2ea4aeb190ee39979e1099d1856a93c9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "23e6ae0d6b3145458e7285adee8c25e3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cd7a693ca49d3883fb57d5064f5aaa5d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4e8b4c437c8a8c1893756bfe27dd50f8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 150994944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
24576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 150994944, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b592e53fa6d6c73c73924b4ba2944b56" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 301989888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
49152, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 301989888, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c6d781f9c489a4bab30b084193c37b1c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
12288, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "984e89674ec1c07fc87a9a8b3a3e72bb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ad1ef7e668a5d8ded355ef2621df9998" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25516032, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 6144 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.weight", |
|
"shape": [ |
|
3072, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 12288 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25178112 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25184256 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25190400 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25196544 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25202688 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25208832 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25214976 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25221120 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25227264 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25233408 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25239552 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25245696 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25251840 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25257984 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25264128 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25270272 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25276416 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25282560 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25288704 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25294848 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25300992 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25307136 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25313280 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25319424 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25331712 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25337856 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25344000 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25350144 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25356288 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25362432 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25368576 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25374720 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25380864 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25387008 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25393152 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25399296 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25405440 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25411584 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25417728 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25423872 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25430016 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25436160 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25442304 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25448448 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25454592 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25460736 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25466880 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25473024 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25479168 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25485312 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25491456 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25497600 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25503744 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25509888 |
|
} |
|
], |
|
"md5sum": "e8b4f06390ce04a29734c710dfe87aa4" |
|
} |
|
] |
|
} |