| { |
| "metadata": { |
| "ParamSize": 485, |
| "ParamBytes": 15201289216.0, |
| "BitsPerParam": 3.6039124214610974 |
| }, |
| "records": [ |
| { |
| "dataPath": "params_shard_0.bin", |
| "format": "raw-shard", |
| "nbytes": 104960000, |
| "records": [ |
| { |
| "name": "lm_head.q_weight", |
| "shape": [ |
| 32000, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 104960000, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "10faa78d7756e848d896a56b1c98705a" |
| }, |
| { |
| "dataPath": "params_shard_1.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.41.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e530efdd94679456777a075747dfd17a" |
| }, |
| { |
| "dataPath": "params_shard_2.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.42.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a69628a16c8929cc7aa7cd7211343fe1" |
| }, |
| { |
| "dataPath": "params_shard_3.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.42.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "89b39dba6e464a38466aa4bfdcdb8bd9" |
| }, |
| { |
| "dataPath": "params_shard_4.bin", |
| "format": "raw-shard", |
| "nbytes": 18053120, |
| "records": [ |
| { |
| "name": "model.layers.42.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e8cf13d2b90dbfa7fe5cf797402d6105" |
| }, |
| { |
| "dataPath": "params_shard_5.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.42.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3872a5860f7d3465631b6cd8f717c6a4" |
| }, |
| { |
| "dataPath": "params_shard_6.bin", |
| "format": "raw-shard", |
| "nbytes": 31240704, |
| "records": [ |
| { |
| "name": "lm_head.q_scale", |
| "shape": [ |
| 32000, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 13120000, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.41.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 13120000 |
| }, |
| { |
| "name": "model.layers.41.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 13136384 |
| }, |
| { |
| "name": "model.layers.41.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 22163968 |
| }, |
| { |
| "name": "model.layers.42.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 22180352 |
| }, |
| { |
| "name": "model.layers.42.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 22196736 |
| }, |
| { |
| "name": "model.layers.42.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 31224320 |
| } |
| ], |
| "md5sum": "ea9c491a1b303ebe0f63e53b0c84487c" |
| }, |
| { |
| "dataPath": "params_shard_7.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.42.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.42.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "5e3cdbe153845d46e4232114de8262bf" |
| }, |
| { |
| "dataPath": "params_shard_8.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.43.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e2aaf9332100001ff1565308a7476660" |
| }, |
| { |
| "dataPath": "params_shard_9.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.43.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2e55c97f23b4bfd899b850a95cd9c85e" |
| }, |
| { |
| "dataPath": "params_shard_10.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.43.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b8abd9022fb6e8739b4cc9a34b905997" |
| }, |
| { |
| "dataPath": "params_shard_11.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.42.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.43.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.43.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.43.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.43.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "b9efd91d2a1d0c32202822593b833ab2" |
| }, |
| { |
| "dataPath": "params_shard_12.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.43.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.43.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "f79e8725d3e57730d039350337328b26" |
| }, |
| { |
| "dataPath": "params_shard_13.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.44.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6642ff0c55403ae67bb3675c82f27272" |
| }, |
| { |
| "dataPath": "params_shard_14.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.44.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3cf631526bdc2e96440202dc80a088ad" |
| }, |
| { |
| "dataPath": "params_shard_15.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.44.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "afe8d1908340ae1285b8fb8bd6eac5ac" |
| }, |
| { |
| "dataPath": "params_shard_16.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.43.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.44.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.44.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.44.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.44.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "af160e96b5dbf2977a9861a4f80ed498" |
| }, |
| { |
| "dataPath": "params_shard_17.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.44.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.44.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "6017ece322d1eb87d2699f82c381c9fd" |
| }, |
| { |
| "dataPath": "params_shard_18.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.45.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5638b01427bf2f5e035ad0ed06874b57" |
| }, |
| { |
| "dataPath": "params_shard_19.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.45.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b4d47609e35c8520cac56d173c223332" |
| }, |
| { |
| "dataPath": "params_shard_20.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.45.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a2d91a383bce6a7a6c3db1000d86f384" |
| }, |
| { |
| "dataPath": "params_shard_21.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.44.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.45.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.45.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.45.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.45.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "9b35c7a1c513f42b1a610bb04ceaca23" |
| }, |
| { |
| "dataPath": "params_shard_22.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.45.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.45.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "cc95df81e7a5296b0c0e0849682aa46c" |
| }, |
| { |
| "dataPath": "params_shard_23.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.46.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f5721723ec549a6fe5a94a6a355a6476" |
| }, |
| { |
| "dataPath": "params_shard_24.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.46.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2b3db2bf7b48447aa62eef77518f2d00" |
| }, |
| { |
| "dataPath": "params_shard_25.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.46.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "54ab76e92776d2b7b9c89c235a2c55e0" |
| }, |
| { |
| "dataPath": "params_shard_26.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.45.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.46.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.46.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.46.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.46.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "2fab169cc48a014a4de3b55b4050369a" |
| }, |
| { |
| "dataPath": "params_shard_27.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.46.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.46.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "28b62462e538c54cfc9a1549cf983f9c" |
| }, |
| { |
| "dataPath": "params_shard_28.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.47.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "73c5e7116d52590ed1977fc55c7e2298" |
| }, |
| { |
| "dataPath": "params_shard_29.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.47.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d7f359b6f4aa83bbd504d3e048b07492" |
| }, |
| { |
| "dataPath": "params_shard_30.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.47.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fd88e19f5b3e46448111af1172534aa1" |
| }, |
| { |
| "dataPath": "params_shard_31.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.46.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.47.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.47.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.47.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.47.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "3ffac08c9f8fa4477eb4c1b12a9f74d2" |
| }, |
| { |
| "dataPath": "params_shard_32.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.47.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.47.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "43f0d4a1c681ea10fa33e1dce44b20f2" |
| }, |
| { |
| "dataPath": "params_shard_33.bin", |
| "format": "raw-shard", |
| "nbytes": 104960000, |
| "records": [ |
| { |
| "name": "model.embed_tokens.q_weight", |
| "shape": [ |
| 32000, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 104960000, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1712a1a3d527f18da9752a3d47d9ae5e" |
| }, |
| { |
| "dataPath": "params_shard_34.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.0.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b188e0e53bd6483f31816e6daff55c3e" |
| }, |
| { |
| "dataPath": "params_shard_35.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.0.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4ae3521c24c6c2f9f49187e8dab9967e" |
| }, |
| { |
| "dataPath": "params_shard_36.bin", |
| "format": "raw-shard", |
| "nbytes": 18053120, |
| "records": [ |
| { |
| "name": "model.layers.0.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "db360401c56023298a5891a9887eb625" |
| }, |
| { |
| "dataPath": "params_shard_37.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.0.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3ec6f421a2b8a2b6aa034e8dafb318d2" |
| }, |
| { |
| "dataPath": "params_shard_38.bin", |
| "format": "raw-shard", |
| "nbytes": 26869760, |
| "records": [ |
| { |
| "name": "model.layers.0.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3df19b984bb9d9c6a548eb83c515dabc" |
| }, |
| { |
| "dataPath": "params_shard_39.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.1.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3f3dade7bc570037bb958590afa1df7e" |
| }, |
| { |
| "dataPath": "params_shard_40.bin", |
| "format": "raw-shard", |
| "nbytes": 33128960, |
| "records": [ |
| { |
| "name": "model.layers.47.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.norm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.embed_tokens.q_scale", |
| "shape": [ |
| 32000, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 13120000, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.0.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 16495104 |
| }, |
| { |
| "name": "model.layers.0.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 16511488 |
| }, |
| { |
| "name": "model.layers.0.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 25539072 |
| }, |
| { |
| "name": "model.layers.0.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 25555456 |
| }, |
| { |
| "name": "model.layers.0.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 29753856 |
| }, |
| { |
| "name": "model.layers.1.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 33112576 |
| } |
| ], |
| "md5sum": "984c020511e0f5a270998ff34630e720" |
| }, |
| { |
| "dataPath": "params_shard_41.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.1.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "80e1a97d709b7584a1208497e62c41e7" |
| }, |
| { |
| "dataPath": "params_shard_42.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.1.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7048e633b41f527cf96a95c47e23aef5" |
| }, |
| { |
| "dataPath": "params_shard_43.bin", |
| "format": "raw-shard", |
| "nbytes": 26869760, |
| "records": [ |
| { |
| "name": "model.layers.1.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0eb6986a21da42cdabd8b6d3f688d41b" |
| }, |
| { |
| "dataPath": "params_shard_44.bin", |
| "format": "raw-shard", |
| "nbytes": 31295488, |
| "records": [ |
| { |
| "name": "model.layers.1.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.1.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 9027584 |
| }, |
| { |
| "name": "model.layers.1.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 27080704 |
| }, |
| { |
| "name": "model.layers.1.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 27097088 |
| } |
| ], |
| "md5sum": "023d27fb88c2d6fd80f9a1eaf1816cc0" |
| }, |
| { |
| "dataPath": "params_shard_45.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.2.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1353ab3cb6d3a4a31a07a1ca75b62659" |
| }, |
| { |
| "dataPath": "params_shard_46.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.2.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cb93e92f8d23d43c0de04a8ba345e3a6" |
| }, |
| { |
| "dataPath": "params_shard_47.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.2.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bedd11857298eb2edcfedee3272fc95f" |
| }, |
| { |
| "dataPath": "params_shard_48.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.1.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.2.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.2.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.2.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.2.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "e372512f2793bb89328af70096cec9d4" |
| }, |
| { |
| "dataPath": "params_shard_49.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.2.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.2.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "0749109230fadb6143a0cb7e4f876782" |
| }, |
| { |
| "dataPath": "params_shard_50.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.3.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "568950f4917df0831d4945d3c22d8fc6" |
| }, |
| { |
| "dataPath": "params_shard_51.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.3.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "70ea2905744d67f08b2d859e98064503" |
| }, |
| { |
| "dataPath": "params_shard_52.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.3.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "62bb93780a1d00ac8c238685fd0f354c" |
| }, |
| { |
| "dataPath": "params_shard_53.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.2.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.3.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.3.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.3.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.3.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "3346ee36772a18a2c9c9da59fa132912" |
| }, |
| { |
| "dataPath": "params_shard_54.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.3.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.3.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "8ad6ff183811a2bfba004616f902651b" |
| }, |
| { |
| "dataPath": "params_shard_55.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.4.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4c2160b6d9c116189a09df7ddf6d8433" |
| }, |
| { |
| "dataPath": "params_shard_56.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.4.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "95ac980a6fd8026b117d16e5e5ce84ec" |
| }, |
| { |
| "dataPath": "params_shard_57.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.4.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c9b40908da992f1c535fdbe09f6ba989" |
| }, |
| { |
| "dataPath": "params_shard_58.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.3.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.4.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.4.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.4.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.4.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "2f616b3dcf6246243ac4710c23e3a6dc" |
| }, |
| { |
| "dataPath": "params_shard_59.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.4.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.4.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "c70eb1ab4552a4fe7b335ddf0d7a1201" |
| }, |
| { |
| "dataPath": "params_shard_60.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.5.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "07e09df31af8624a0c43c3a30e151476" |
| }, |
| { |
| "dataPath": "params_shard_61.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.5.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "75d60f7a56e17e3c4097ca0089855223" |
| }, |
| { |
| "dataPath": "params_shard_62.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.5.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9999f8c3ef21bf43464ac54dfcc25b75" |
| }, |
| { |
| "dataPath": "params_shard_63.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.4.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.5.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.5.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.5.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.5.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "4f056ced48deacca1b1c3c98af5bab44" |
| }, |
| { |
| "dataPath": "params_shard_64.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.5.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.5.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "d0ae3ea266bb04606d03d5f451edb4da" |
| }, |
| { |
| "dataPath": "params_shard_65.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.6.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f001d928e303e36cc0772bc992bd76d4" |
| }, |
| { |
| "dataPath": "params_shard_66.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.6.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a67a23002f4032504878974e73ff4cdd" |
| }, |
| { |
| "dataPath": "params_shard_67.bin", |
| "format": "raw-shard", |
| "nbytes": 26869760, |
| "records": [ |
| { |
| "name": "model.layers.6.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4391b1996a8a02c832b14e6a75cb422d" |
| }, |
| { |
| "dataPath": "params_shard_68.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.10.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6a437e5beab168eebc1dd6b3382b3c6e" |
| }, |
| { |
| "dataPath": "params_shard_69.bin", |
| "format": "raw-shard", |
| "nbytes": 28985344, |
| "records": [ |
| { |
| "name": "model.layers.5.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.6.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.6.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 21411840 |
| }, |
| { |
| "name": "model.layers.6.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 25610240 |
| }, |
| { |
| "name": "model.layers.10.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 28968960 |
| } |
| ], |
| "md5sum": "0b9158701246b7f5972edeba0a54cef2" |
| }, |
| { |
| "dataPath": "params_shard_70.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.10.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "646d224c5ed9091e14af6f77af24c194" |
| }, |
| { |
| "dataPath": "params_shard_71.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.10.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4789a689efc3f0f84dae2dad280b4990" |
| }, |
| { |
| "dataPath": "params_shard_72.bin", |
| "format": "raw-shard", |
| "nbytes": 26869760, |
| "records": [ |
| { |
| "name": "model.layers.10.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "909cc57bf4c0efa304caef9c1dbf1e4c" |
| }, |
| { |
| "dataPath": "params_shard_73.bin", |
| "format": "raw-shard", |
| "nbytes": 31295488, |
| "records": [ |
| { |
| "name": "model.layers.10.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.10.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 9027584 |
| }, |
| { |
| "name": "model.layers.10.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 27080704 |
| }, |
| { |
| "name": "model.layers.10.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 27097088 |
| } |
| ], |
| "md5sum": "48e845a3a1d9717032e5302cd839d0d3" |
| }, |
| { |
| "dataPath": "params_shard_74.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.11.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "92aa8796bd2dccc0c9de4c5565feee4b" |
| }, |
| { |
| "dataPath": "params_shard_75.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.11.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "56cffebd8ac7fc418936e41d9dc2e4ff" |
| }, |
| { |
| "dataPath": "params_shard_76.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.11.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3d285d47e55102776094bdcd8d09be42" |
| }, |
| { |
| "dataPath": "params_shard_77.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.10.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.11.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.11.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.11.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.11.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "ee36783d25323d9b16b1d6d073bb875d" |
| }, |
| { |
| "dataPath": "params_shard_78.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.11.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.11.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "452b17c563e43200f1ba22f3b4ce7ee7" |
| }, |
| { |
| "dataPath": "params_shard_79.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.12.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1a47dace39e336e24327a520eebf7876" |
| }, |
| { |
| "dataPath": "params_shard_80.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.12.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c81c3f4b0dbe5b20aa7d8bd26fe67c00" |
| }, |
| { |
| "dataPath": "params_shard_81.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.12.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1d6f01346839d4ea362ce1f22d14edd9" |
| }, |
| { |
| "dataPath": "params_shard_82.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.11.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.12.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.12.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.12.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.12.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "869ce509b3ed87566a6e94cb2a2e062d" |
| }, |
| { |
| "dataPath": "params_shard_83.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.12.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.12.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "3d1635a14591979cb61cd90b751321de" |
| }, |
| { |
| "dataPath": "params_shard_84.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.13.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c9513a2c1a45d3cdfabe9ada359d11a4" |
| }, |
| { |
| "dataPath": "params_shard_85.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.13.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9dfdd814e9520209d85450da6bf0a5c5" |
| }, |
| { |
| "dataPath": "params_shard_86.bin", |
| "format": "raw-shard", |
| "nbytes": 26869760, |
| "records": [ |
| { |
| "name": "model.layers.13.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a1a5a349598039cefea30310c243b42d" |
| }, |
| { |
| "dataPath": "params_shard_87.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.6.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1482867b0a4ed3e60c7b54e43fe8d43c" |
| }, |
| { |
| "dataPath": "params_shard_88.bin", |
| "format": "raw-shard", |
| "nbytes": 28985344, |
| "records": [ |
| { |
| "name": "model.layers.12.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.13.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.13.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 21411840 |
| }, |
| { |
| "name": "model.layers.13.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 25610240 |
| }, |
| { |
| "name": "model.layers.6.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 28968960 |
| } |
| ], |
| "md5sum": "c672d2f158bfdfd50f841a502ffa4784" |
| }, |
| { |
| "dataPath": "params_shard_89.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.7.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "62398de8b64c52545edeaa745e5eb7ef" |
| }, |
| { |
| "dataPath": "params_shard_90.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.7.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "71c7eaecb745871008814b16d23b29b0" |
| }, |
| { |
| "dataPath": "params_shard_91.bin", |
| "format": "raw-shard", |
| "nbytes": 18053120, |
| "records": [ |
| { |
| "name": "model.layers.7.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "549bc3ed531cdbe500c8e6f4cdc84aea" |
| }, |
| { |
| "dataPath": "params_shard_92.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.7.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3234389c165d78b147bc99f4bfaac69b" |
| }, |
| { |
| "dataPath": "params_shard_93.bin", |
| "format": "raw-shard", |
| "nbytes": 26869760, |
| "records": [ |
| { |
| "name": "model.layers.7.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d17dad8e71661cc412e007db8f2b9b04" |
| }, |
| { |
| "dataPath": "params_shard_94.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.8.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9363a0ef375c7adf001b507c11a9fdb4" |
| }, |
| { |
| "dataPath": "params_shard_95.bin", |
| "format": "raw-shard", |
| "nbytes": 25677824, |
| "records": [ |
| { |
| "name": "model.layers.6.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.6.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 9027584 |
| }, |
| { |
| "name": "model.layers.7.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 9043968 |
| }, |
| { |
| "name": "model.layers.7.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 9060352 |
| }, |
| { |
| "name": "model.layers.7.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 18087936 |
| }, |
| { |
| "name": "model.layers.7.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 18104320 |
| }, |
| { |
| "name": "model.layers.7.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 22302720 |
| }, |
| { |
| "name": "model.layers.8.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 25661440 |
| } |
| ], |
| "md5sum": "770afd5452c6a3d53944d94b0c60efbe" |
| }, |
| { |
| "dataPath": "params_shard_96.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.8.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "149bbf31effe9cff64719dc88846a55a" |
| }, |
| { |
| "dataPath": "params_shard_97.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.8.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5d3758f04ac7439e09f9ef934595f4ee" |
| }, |
| { |
| "dataPath": "params_shard_98.bin", |
| "format": "raw-shard", |
| "nbytes": 26869760, |
| "records": [ |
| { |
| "name": "model.layers.8.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b3fa065039c81b0c62f082fc894cc55e" |
| }, |
| { |
| "dataPath": "params_shard_99.bin", |
| "format": "raw-shard", |
| "nbytes": 31295488, |
| "records": [ |
| { |
| "name": "model.layers.8.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.8.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 9027584 |
| }, |
| { |
| "name": "model.layers.8.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 27080704 |
| }, |
| { |
| "name": "model.layers.8.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 27097088 |
| } |
| ], |
| "md5sum": "2a294f35877f62e7b7a32a82e3dfe8d4" |
| }, |
| { |
| "dataPath": "params_shard_100.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.9.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f32df20a9c47ec5f6f1aa6b970816d04" |
| }, |
| { |
| "dataPath": "params_shard_101.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.9.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "912e31be61c9d20f46d2528413e9d65c" |
| }, |
| { |
| "dataPath": "params_shard_102.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.9.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c7a1afb5bd261dcd99c310327fa43a16" |
| }, |
| { |
| "dataPath": "params_shard_103.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.8.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.9.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.9.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.9.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.9.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "919a6370adcede15bf362be1ece4e1b3" |
| }, |
| { |
| "dataPath": "params_shard_104.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.9.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.9.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "17eae7a05bc5ae7640ad0bc3d1df5d95" |
| }, |
| { |
| "dataPath": "params_shard_105.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.13.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cb56135ea54a60cbdad20fd103dc9d9d" |
| }, |
| { |
| "dataPath": "params_shard_106.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.14.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0e9ae00ea48b35ae5d602c728494379e" |
| }, |
| { |
| "dataPath": "params_shard_107.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.14.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3e220187fc7129e7775e5848379e705e" |
| }, |
| { |
| "dataPath": "params_shard_108.bin", |
| "format": "raw-shard", |
| "nbytes": 18053120, |
| "records": [ |
| { |
| "name": "model.layers.14.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "30700064cedc05436056335e5d0d173e" |
| }, |
| { |
| "dataPath": "params_shard_109.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.14.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fa93b2a3307ff8869e7886c83c61bca7" |
| }, |
| { |
| "dataPath": "params_shard_110.bin", |
| "format": "raw-shard", |
| "nbytes": 26869760, |
| "records": [ |
| { |
| "name": "model.layers.14.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "777a964e476b31f216de2915ed3a49ae" |
| }, |
| { |
| "dataPath": "params_shard_111.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.15.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3714f6bff641e153c859bce7697a8c97" |
| }, |
| { |
| "dataPath": "params_shard_112.bin", |
| "format": "raw-shard", |
| "nbytes": 29052928, |
| "records": [ |
| { |
| "name": "model.layers.9.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.13.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.13.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.13.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.14.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 12419072 |
| }, |
| { |
| "name": "model.layers.14.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 12435456 |
| }, |
| { |
| "name": "model.layers.14.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 21463040 |
| }, |
| { |
| "name": "model.layers.14.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 21479424 |
| }, |
| { |
| "name": "model.layers.14.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 25677824 |
| }, |
| { |
| "name": "model.layers.15.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 29036544 |
| } |
| ], |
| "md5sum": "a53834b2ef9dd54051e60faa9b011bb3" |
| }, |
| { |
| "dataPath": "params_shard_113.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.15.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "05dbe8e0fd31a095af6cf77725c6211f" |
| }, |
| { |
| "dataPath": "params_shard_114.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.15.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b434461fb5a536892c12c876966a880d" |
| }, |
| { |
| "dataPath": "params_shard_115.bin", |
| "format": "raw-shard", |
| "nbytes": 26869760, |
| "records": [ |
| { |
| "name": "model.layers.15.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d062f301d49ceb4197b0d91abda9f0d4" |
| }, |
| { |
| "dataPath": "params_shard_116.bin", |
| "format": "raw-shard", |
| "nbytes": 31295488, |
| "records": [ |
| { |
| "name": "model.layers.15.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.15.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 9027584 |
| }, |
| { |
| "name": "model.layers.15.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 27080704 |
| }, |
| { |
| "name": "model.layers.15.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 27097088 |
| } |
| ], |
| "md5sum": "2175f44b1f71d1c2f521128405d4955d" |
| }, |
| { |
| "dataPath": "params_shard_117.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.16.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e833c0d40b46c3a8b8f88cd7748488c6" |
| }, |
| { |
| "dataPath": "params_shard_118.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.16.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "dff5db21da22c8cd46be72a7dba3a0b0" |
| }, |
| { |
| "dataPath": "params_shard_119.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.16.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "45eedc95fc961184ae3c62283301d73e" |
| }, |
| { |
| "dataPath": "params_shard_120.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.15.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.16.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.16.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.16.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.16.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "ce20fa7f4b3ea39decb962d0dc34174d" |
| }, |
| { |
| "dataPath": "params_shard_121.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.16.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.16.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "01a2f99d5ead7f16ac37307716219bfd" |
| }, |
| { |
| "dataPath": "params_shard_122.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.17.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "23870cf912886d82c0845f000dc682c4" |
| }, |
| { |
| "dataPath": "params_shard_123.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.17.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "61c54346043a438a5ca1b1690b834024" |
| }, |
| { |
| "dataPath": "params_shard_124.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.17.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "62844e4906d274d4cf32792cb6e9a374" |
| }, |
| { |
| "dataPath": "params_shard_125.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.16.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.17.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.17.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.17.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.17.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "7bbc48418119a202f25e94c350b5a226" |
| }, |
| { |
| "dataPath": "params_shard_126.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.17.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.17.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "879bbec454efaf0c7980e117dae1904c" |
| }, |
| { |
| "dataPath": "params_shard_127.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.18.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ecfc00d8f94a6ee4610444851b738e06" |
| }, |
| { |
| "dataPath": "params_shard_128.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.18.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "68e743a082d6db538a31df0fce31c491" |
| }, |
| { |
| "dataPath": "params_shard_129.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.18.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "47c47046aa040185ff51f05c1cbe22dd" |
| }, |
| { |
| "dataPath": "params_shard_130.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.17.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.18.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.18.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.18.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.18.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "7d02d8331a2a60f242a01226bc1f6012" |
| }, |
| { |
| "dataPath": "params_shard_131.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.18.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.18.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "028421c2911989c8bcf0babccbabcc02" |
| }, |
| { |
| "dataPath": "params_shard_132.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.19.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "68f68d725fbe5029c6490e6d6255ef48" |
| }, |
| { |
| "dataPath": "params_shard_133.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.19.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d4ca9cbce2416e6ae3414884680355c5" |
| }, |
| { |
| "dataPath": "params_shard_134.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.19.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6d4e81e13488194897f7e63ee9ad3fa3" |
| }, |
| { |
| "dataPath": "params_shard_135.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.18.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.19.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.19.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.19.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.19.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "84dfef56b62bac7015e59edb483c6d3f" |
| }, |
| { |
| "dataPath": "params_shard_136.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.19.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.19.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "eb75bedd3f417dec98f2a3768a0f1ddb" |
| }, |
| { |
| "dataPath": "params_shard_137.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.20.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4a3c3b3b98b38b22da201d2bc67c82f6" |
| }, |
| { |
| "dataPath": "params_shard_138.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.20.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "76defa48c02d9b15222250f2dc1c1118" |
| }, |
| { |
| "dataPath": "params_shard_139.bin", |
| "format": "raw-shard", |
| "nbytes": 26869760, |
| "records": [ |
| { |
| "name": "model.layers.20.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "26b006992ed91fb6bb2a966ea7110fd7" |
| }, |
| { |
| "dataPath": "params_shard_140.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.20.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f7fe13063b6f9f6d73e0c538cae0b941" |
| }, |
| { |
| "dataPath": "params_shard_141.bin", |
| "format": "raw-shard", |
| "nbytes": 28985344, |
| "records": [ |
| { |
| "name": "model.layers.19.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.20.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.20.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 21411840 |
| }, |
| { |
| "name": "model.layers.20.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 25610240 |
| }, |
| { |
| "name": "model.layers.20.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 28968960 |
| } |
| ], |
| "md5sum": "b4825a02ea48dde54461166f3da075a5" |
| }, |
| { |
| "dataPath": "params_shard_142.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.21.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e013cddae05204057d9b1725c718325c" |
| }, |
| { |
| "dataPath": "params_shard_143.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.21.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "edc4a4a6c947fe6de1c218c819cfcc0a" |
| }, |
| { |
| "dataPath": "params_shard_144.bin", |
| "format": "raw-shard", |
| "nbytes": 18053120, |
| "records": [ |
| { |
| "name": "model.layers.21.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f9bc97ee4ba3254c6ba70e9b72425580" |
| }, |
| { |
| "dataPath": "params_shard_145.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.21.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "15f0032721a84a7f583952cf5405f30f" |
| }, |
| { |
| "dataPath": "params_shard_146.bin", |
| "format": "raw-shard", |
| "nbytes": 26869760, |
| "records": [ |
| { |
| "name": "model.layers.21.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "440a0716fa096f111d53297994272dd7" |
| }, |
| { |
| "dataPath": "params_shard_147.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.22.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "36b42a588ab18194e9c6b659c8eb5a49" |
| }, |
| { |
| "dataPath": "params_shard_148.bin", |
| "format": "raw-shard", |
| "nbytes": 25677824, |
| "records": [ |
| { |
| "name": "model.layers.20.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.20.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 9027584 |
| }, |
| { |
| "name": "model.layers.21.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 9043968 |
| }, |
| { |
| "name": "model.layers.21.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 9060352 |
| }, |
| { |
| "name": "model.layers.21.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 18087936 |
| }, |
| { |
| "name": "model.layers.21.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 18104320 |
| }, |
| { |
| "name": "model.layers.21.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 22302720 |
| }, |
| { |
| "name": "model.layers.22.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 25661440 |
| } |
| ], |
| "md5sum": "23bdcd715ff5b224a93ee45a44a3bd2e" |
| }, |
| { |
| "dataPath": "params_shard_149.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.22.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "eb8b6ca86b17c6c819283b3a5358bff6" |
| }, |
| { |
| "dataPath": "params_shard_150.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.22.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d349a755b422a67323dd02ef5165501d" |
| }, |
| { |
| "dataPath": "params_shard_151.bin", |
| "format": "raw-shard", |
| "nbytes": 26869760, |
| "records": [ |
| { |
| "name": "model.layers.22.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2ae96cb72d5e197f91ff10070591d90f" |
| }, |
| { |
| "dataPath": "params_shard_152.bin", |
| "format": "raw-shard", |
| "nbytes": 31295488, |
| "records": [ |
| { |
| "name": "model.layers.22.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.22.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 9027584 |
| }, |
| { |
| "name": "model.layers.22.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 27080704 |
| }, |
| { |
| "name": "model.layers.22.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 27097088 |
| } |
| ], |
| "md5sum": "39842a828b971892a5f32cdd94fe1a9c" |
| }, |
| { |
| "dataPath": "params_shard_153.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.23.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b5338450344878dc44665e2249f75324" |
| }, |
| { |
| "dataPath": "params_shard_154.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.23.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9574ed594799a1de3646f9ed72ebd59c" |
| }, |
| { |
| "dataPath": "params_shard_155.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.23.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "261edd13bbeed51315b52f9e18530d24" |
| }, |
| { |
| "dataPath": "params_shard_156.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.22.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.23.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.23.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.23.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.23.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "049ff09c19c708d8f82d627d538887d5" |
| }, |
| { |
| "dataPath": "params_shard_157.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.23.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.23.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "dde0979fd9196bc3f7b77a7ae6f17930" |
| }, |
| { |
| "dataPath": "params_shard_158.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.24.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6bbe79f1518c24cee5200c54f75522b4" |
| }, |
| { |
| "dataPath": "params_shard_159.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.24.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "68d9ae9dfa0eb943f8dd5f7d03281694" |
| }, |
| { |
| "dataPath": "params_shard_160.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.24.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6019afff0aa10835f76f4a5a4172acaa" |
| }, |
| { |
| "dataPath": "params_shard_161.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.23.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.24.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.24.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.24.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.24.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "59ac88d412035f1fbd62e88f9c5e7b93" |
| }, |
| { |
| "dataPath": "params_shard_162.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.24.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.24.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "b1afe87958a3d82853ee7b6e0fe75064" |
| }, |
| { |
| "dataPath": "params_shard_163.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.25.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "af8a77850bed18a2ea9fba458bac3dc1" |
| }, |
| { |
| "dataPath": "params_shard_164.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.25.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a83090b8e79c536ec1d4c8ab443c3873" |
| }, |
| { |
| "dataPath": "params_shard_165.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.25.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6ada7e80ebdf3f84d2d0a6d97cd18d1a" |
| }, |
| { |
| "dataPath": "params_shard_166.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.24.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.25.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.25.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.25.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.25.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "6553f1975e90cd4b3d990cede97f1893" |
| }, |
| { |
| "dataPath": "params_shard_167.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.25.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.25.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "a88e77d86ec197b0b35c067b3befb9a3" |
| }, |
| { |
| "dataPath": "params_shard_168.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.26.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7c3e43ac11317922de97766d005f3a9c" |
| }, |
| { |
| "dataPath": "params_shard_169.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.26.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e72ef0375da604d812fcb6fb0a8e6032" |
| }, |
| { |
| "dataPath": "params_shard_170.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.26.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4f582b6e40179ec867cb569e2904d168" |
| }, |
| { |
| "dataPath": "params_shard_171.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.25.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.26.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.26.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.26.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.26.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "ce3165bebe60afbe168180b5b70ef8b6" |
| }, |
| { |
| "dataPath": "params_shard_172.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.26.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.26.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "a3855f08c3ed391839c84d1392cb251c" |
| }, |
| { |
| "dataPath": "params_shard_173.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.27.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a5097ef02cf60fb2896fb7fbc76ceceb" |
| }, |
| { |
| "dataPath": "params_shard_174.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.27.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ba63c5da743428a68db79857582ba921" |
| }, |
| { |
| "dataPath": "params_shard_175.bin", |
| "format": "raw-shard", |
| "nbytes": 26869760, |
| "records": [ |
| { |
| "name": "model.layers.27.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f290e64c06e38b15c616074e709a3d6b" |
| }, |
| { |
| "dataPath": "params_shard_176.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.27.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1280d25bc385d134b8b4ab101584b62f" |
| }, |
| { |
| "dataPath": "params_shard_177.bin", |
| "format": "raw-shard", |
| "nbytes": 28985344, |
| "records": [ |
| { |
| "name": "model.layers.26.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.27.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.27.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 21411840 |
| }, |
| { |
| "name": "model.layers.27.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 25610240 |
| }, |
| { |
| "name": "model.layers.27.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 28968960 |
| } |
| ], |
| "md5sum": "3031c9832ba9c0a9d90d588eb03fc249" |
| }, |
| { |
| "dataPath": "params_shard_178.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.28.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f6378dd0f40dceb8cf6a8a73f0d8e8f3" |
| }, |
| { |
| "dataPath": "params_shard_179.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.28.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "08c6bd7594831f9146eb2be057adc59d" |
| }, |
| { |
| "dataPath": "params_shard_180.bin", |
| "format": "raw-shard", |
| "nbytes": 18053120, |
| "records": [ |
| { |
| "name": "model.layers.28.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3d3b4247700d79962d5653789e00e145" |
| }, |
| { |
| "dataPath": "params_shard_181.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.28.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c872885a37f3fd11c165026af428866a" |
| }, |
| { |
| "dataPath": "params_shard_182.bin", |
| "format": "raw-shard", |
| "nbytes": 26869760, |
| "records": [ |
| { |
| "name": "model.layers.28.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1d1d2f645989f65d30e478ad291fcd6d" |
| }, |
| { |
| "dataPath": "params_shard_183.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.29.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "dd18424966968c28619b83ac0abe3e6d" |
| }, |
| { |
| "dataPath": "params_shard_184.bin", |
| "format": "raw-shard", |
| "nbytes": 25677824, |
| "records": [ |
| { |
| "name": "model.layers.27.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.27.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 9027584 |
| }, |
| { |
| "name": "model.layers.28.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 9043968 |
| }, |
| { |
| "name": "model.layers.28.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 9060352 |
| }, |
| { |
| "name": "model.layers.28.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 18087936 |
| }, |
| { |
| "name": "model.layers.28.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 18104320 |
| }, |
| { |
| "name": "model.layers.28.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 22302720 |
| }, |
| { |
| "name": "model.layers.29.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 25661440 |
| } |
| ], |
| "md5sum": "dd280befaf3a5684d3abd761612a7c51" |
| }, |
| { |
| "dataPath": "params_shard_185.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.29.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ca27ba6515d02eb7bf038a8e2096440a" |
| }, |
| { |
| "dataPath": "params_shard_186.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.29.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "14306488e16a967e3f1597a395b95ef9" |
| }, |
| { |
| "dataPath": "params_shard_187.bin", |
| "format": "raw-shard", |
| "nbytes": 26869760, |
| "records": [ |
| { |
| "name": "model.layers.29.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7523db6414744d3cdf751ac4cad8a300" |
| }, |
| { |
| "dataPath": "params_shard_188.bin", |
| "format": "raw-shard", |
| "nbytes": 31295488, |
| "records": [ |
| { |
| "name": "model.layers.29.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.29.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 9027584 |
| }, |
| { |
| "name": "model.layers.29.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 27080704 |
| }, |
| { |
| "name": "model.layers.29.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 27097088 |
| } |
| ], |
| "md5sum": "c7ccb75866898422fa7dceb1359a1c10" |
| }, |
| { |
| "dataPath": "params_shard_189.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.30.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "620f94220add60cecbd30d117ac98863" |
| }, |
| { |
| "dataPath": "params_shard_190.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.30.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ec8cb0aed1a81fa455a43b2499fea3c3" |
| }, |
| { |
| "dataPath": "params_shard_191.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.30.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e89bb5ebd8ab65d4980d33d2e1edbef2" |
| }, |
| { |
| "dataPath": "params_shard_192.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.29.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.30.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.30.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.30.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.30.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "0aaa2112822e7570859696fea75a6cf4" |
| }, |
| { |
| "dataPath": "params_shard_193.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.30.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.30.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "a3debe2f1a06138931e09e232c02e90b" |
| }, |
| { |
| "dataPath": "params_shard_194.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.31.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "74fb034034e248480bca56647e282e16" |
| }, |
| { |
| "dataPath": "params_shard_195.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.31.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "06b39ce0fc34565649e398fa8439b93b" |
| }, |
| { |
| "dataPath": "params_shard_196.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.31.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "96a176541f211522f567e9b8316de470" |
| }, |
| { |
| "dataPath": "params_shard_197.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.30.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.31.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.31.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.31.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.31.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "01bbaf476ea4368900a8f57d5b140214" |
| }, |
| { |
| "dataPath": "params_shard_198.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.31.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.31.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "6fa9860f590d5b39170efec35d1233cb" |
| }, |
| { |
| "dataPath": "params_shard_199.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.32.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "85ffc86403233ab4c0c68a195f9ee9c5" |
| }, |
| { |
| "dataPath": "params_shard_200.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.32.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8c588a4e8b943738591292eb866d7696" |
| }, |
| { |
| "dataPath": "params_shard_201.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.32.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "438b4cebfd20dfd6d6ee756f4f18fde4" |
| }, |
| { |
| "dataPath": "params_shard_202.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.31.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.32.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.32.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.32.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.32.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "04078751c58b092a7e64eb68005e4e63" |
| }, |
| { |
| "dataPath": "params_shard_203.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.32.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.32.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "424491ad8730d3df18e554760541434f" |
| }, |
| { |
| "dataPath": "params_shard_204.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.33.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fc95e5f815b5a51503c8be32ce5291f5" |
| }, |
| { |
| "dataPath": "params_shard_205.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.33.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "99c8ebabc910161bdab126289a1e99d5" |
| }, |
| { |
| "dataPath": "params_shard_206.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.33.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "486b1f891c0a8420d6c9e15fcab249a1" |
| }, |
| { |
| "dataPath": "params_shard_207.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.32.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.33.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.33.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.33.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.33.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "cc0f86424e498977cc8b27feae26a842" |
| }, |
| { |
| "dataPath": "params_shard_208.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.33.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.33.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "44adeae3664e5449b939a0345f9413d7" |
| }, |
| { |
| "dataPath": "params_shard_209.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.34.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5f268de22fc4c67c2db8c5ae44c4f9b3" |
| }, |
| { |
| "dataPath": "params_shard_210.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.34.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c000330f7b109f3f5760032feec3a937" |
| }, |
| { |
| "dataPath": "params_shard_211.bin", |
| "format": "raw-shard", |
| "nbytes": 26869760, |
| "records": [ |
| { |
| "name": "model.layers.34.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d11f118a721a49f12a6fbffd73ae024e" |
| }, |
| { |
| "dataPath": "params_shard_212.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.34.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a13872a78ac3ba4ed55fc306b4e70cfe" |
| }, |
| { |
| "dataPath": "params_shard_213.bin", |
| "format": "raw-shard", |
| "nbytes": 28985344, |
| "records": [ |
| { |
| "name": "model.layers.33.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.34.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.34.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 21411840 |
| }, |
| { |
| "name": "model.layers.34.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 25610240 |
| }, |
| { |
| "name": "model.layers.34.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 28968960 |
| } |
| ], |
| "md5sum": "a74f851f2ca8cfed4c071c890e972525" |
| }, |
| { |
| "dataPath": "params_shard_214.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.35.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6ea85f58cc17e46b790ce5b3f000b0e6" |
| }, |
| { |
| "dataPath": "params_shard_215.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.35.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9f945c156e0b0c283fc7908e30e70f91" |
| }, |
| { |
| "dataPath": "params_shard_216.bin", |
| "format": "raw-shard", |
| "nbytes": 18053120, |
| "records": [ |
| { |
| "name": "model.layers.35.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9682c40005dda0f3eca8334e36ccdd70" |
| }, |
| { |
| "dataPath": "params_shard_217.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.35.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cf08a784973fe09be8b4f89157c8b6bb" |
| }, |
| { |
| "dataPath": "params_shard_218.bin", |
| "format": "raw-shard", |
| "nbytes": 26869760, |
| "records": [ |
| { |
| "name": "model.layers.35.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a75f7eaf058517fb441dfa5af425e4fd" |
| }, |
| { |
| "dataPath": "params_shard_219.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.36.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "47ac235634b2f5400fb97c6732f417fe" |
| }, |
| { |
| "dataPath": "params_shard_220.bin", |
| "format": "raw-shard", |
| "nbytes": 25677824, |
| "records": [ |
| { |
| "name": "model.layers.34.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.34.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 9027584 |
| }, |
| { |
| "name": "model.layers.35.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 9043968 |
| }, |
| { |
| "name": "model.layers.35.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 9060352 |
| }, |
| { |
| "name": "model.layers.35.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 18087936 |
| }, |
| { |
| "name": "model.layers.35.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 18104320 |
| }, |
| { |
| "name": "model.layers.35.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 22302720 |
| }, |
| { |
| "name": "model.layers.36.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 25661440 |
| } |
| ], |
| "md5sum": "ec39c462970ebe4ee10f5ee79576a79e" |
| }, |
| { |
| "dataPath": "params_shard_221.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.36.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5868698d5c96fa28c20d7d199088bf83" |
| }, |
| { |
| "dataPath": "params_shard_222.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.36.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c7f5d8c69cd3f55d6ac30517c51f3ae2" |
| }, |
| { |
| "dataPath": "params_shard_223.bin", |
| "format": "raw-shard", |
| "nbytes": 26869760, |
| "records": [ |
| { |
| "name": "model.layers.36.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "29e2bbbfedd140d4948e6910bb014354" |
| }, |
| { |
| "dataPath": "params_shard_224.bin", |
| "format": "raw-shard", |
| "nbytes": 31295488, |
| "records": [ |
| { |
| "name": "model.layers.36.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.36.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 9027584 |
| }, |
| { |
| "name": "model.layers.36.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 27080704 |
| }, |
| { |
| "name": "model.layers.36.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 27097088 |
| } |
| ], |
| "md5sum": "f472c32a1adb655faefc09e220241442" |
| }, |
| { |
| "dataPath": "params_shard_225.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.37.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f3cff3bc3bc2ae4ba5afee4202b481d8" |
| }, |
| { |
| "dataPath": "params_shard_226.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.37.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7d79d7c8a9e50fedb64aa84e4b2cf588" |
| }, |
| { |
| "dataPath": "params_shard_227.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.37.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c4242f169324f511a0373d752457f01b" |
| }, |
| { |
| "dataPath": "params_shard_228.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.36.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.37.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.37.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.37.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.37.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "7a78eb6ac0e86c2e5a77a72f30d6d7f0" |
| }, |
| { |
| "dataPath": "params_shard_229.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.37.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.37.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "5229dca6884f6654bc9e3abd7feded3f" |
| }, |
| { |
| "dataPath": "params_shard_230.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.38.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3131da72b52111f394cdf2d1562aa625" |
| }, |
| { |
| "dataPath": "params_shard_231.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.38.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "92e5530162399b4ad126dfce6c1c2bb6" |
| }, |
| { |
| "dataPath": "params_shard_232.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.38.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3ac36c3308c67f324c2fc9487c62b728" |
| }, |
| { |
| "dataPath": "params_shard_233.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.37.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.38.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.38.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.38.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.38.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "d789488f4bb9eb6926f92ce6ae95c4c5" |
| }, |
| { |
| "dataPath": "params_shard_234.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.38.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.38.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "4853f8071d659590ef357dd6782c7c09" |
| }, |
| { |
| "dataPath": "params_shard_235.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.39.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7e288cb4dbdd57736df289cbd57cb42d" |
| }, |
| { |
| "dataPath": "params_shard_236.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.39.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "efb8406b8410c9d74f06349e635b8258" |
| }, |
| { |
| "dataPath": "params_shard_237.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.39.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7f730617c8eae5741b25b4f6d97941f8" |
| }, |
| { |
| "dataPath": "params_shard_238.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.38.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.39.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.39.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.39.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.39.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "6c2ac9903312580c493c566eefc07df3" |
| }, |
| { |
| "dataPath": "params_shard_239.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.39.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.39.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "49e9fc7c8cf3b161b660d28dafc1884a" |
| }, |
| { |
| "dataPath": "params_shard_240.bin", |
| "format": "raw-shard", |
| "nbytes": 72220672, |
| "records": [ |
| { |
| "name": "model.layers.40.mlp.down_proj.q_weight", |
| "shape": [ |
| 8192, |
| 2204 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 72220672, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4702d085f2757cc773efd691652d3852" |
| }, |
| { |
| "dataPath": "params_shard_241.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.40.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f6126c71aee4ccfc72eaae805e1d037c" |
| }, |
| { |
| "dataPath": "params_shard_242.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.40.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cfe65db297c32a7a174d8f9897cc8c0b" |
| }, |
| { |
| "dataPath": "params_shard_243.bin", |
| "format": "raw-shard", |
| "nbytes": 30472192, |
| "records": [ |
| { |
| "name": "model.layers.39.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.40.input_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.40.mlp.down_proj.q_scale", |
| "shape": [ |
| 8192, |
| 551 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9027584, |
| "byteOffset": 3375104 |
| }, |
| { |
| "name": "model.layers.40.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 12402688 |
| }, |
| { |
| "name": "model.layers.40.post_attention_layernorm.weight", |
| "shape": [ |
| 8192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 16384, |
| "byteOffset": 30455808 |
| } |
| ], |
| "md5sum": "3170d9dc69b4fc226ee26f6c0d027ac8" |
| }, |
| { |
| "dataPath": "params_shard_244.bin", |
| "format": "raw-shard", |
| "nbytes": 31068160, |
| "records": [ |
| { |
| "name": "model.layers.40.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.40.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 4198400 |
| } |
| ], |
| "md5sum": "2cbfb23026c0d87cce43b03319fb33a9" |
| }, |
| { |
| "dataPath": "params_shard_245.bin", |
| "format": "raw-shard", |
| "nbytes": 144424960, |
| "records": [ |
| { |
| "name": "model.layers.41.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 44032, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 144424960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "090d056cf4438b4a87654a585d39f184" |
| }, |
| { |
| "dataPath": "params_shard_246.bin", |
| "format": "raw-shard", |
| "nbytes": 33587200, |
| "records": [ |
| { |
| "name": "model.layers.41.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 10240, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 33587200, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4c73af4912a8783d675d8ca2e51241e5" |
| }, |
| { |
| "dataPath": "params_shard_247.bin", |
| "format": "raw-shard", |
| "nbytes": 26869760, |
| "records": [ |
| { |
| "name": "model.layers.41.self_attn.o_proj.q_weight", |
| "shape": [ |
| 8192, |
| 820 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 26869760, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ac8a7bb01516a3c5040722f18baec4aa" |
| }, |
| { |
| "dataPath": "params_shard_248.bin", |
| "format": "raw-shard", |
| "nbytes": 28968960, |
| "records": [ |
| { |
| "name": "model.layers.40.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.41.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 44032, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18053120, |
| "byteOffset": 3358720 |
| }, |
| { |
| "name": "model.layers.41.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 10240, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4198400, |
| "byteOffset": 21411840 |
| }, |
| { |
| "name": "model.layers.41.self_attn.o_proj.q_scale", |
| "shape": [ |
| 8192, |
| 205 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3358720, |
| "byteOffset": 25610240 |
| } |
| ], |
| "md5sum": "f236c60dd4577aa5e3b3b7c9c082822a" |
| } |
| ] |
| } |