| { |
| "metadata": { |
| "ParamSize": 325, |
| "ParamBytes": 4526981120.0, |
| "BitsPerParam": 5.000992646497372 |
| }, |
| "records": [ |
| { |
| "dataPath": "params_shard_0.bin", |
| "format": "raw-shard", |
| "nbytes": 65536000, |
| "records": [ |
| { |
| "name": "model.embed_tokens.q_weight", |
| "shape": [ |
| 32000, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 65536000, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2b57581148c833b363b3a46155d6c7a0" |
| }, |
| { |
| "dataPath": "params_shard_1.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.0.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6ca6a9ebee27547da33fca7f9a5db8b8" |
| }, |
| { |
| "dataPath": "params_shard_2.bin", |
| "format": "raw-shard", |
| "nbytes": 31784960, |
| "records": [ |
| { |
| "name": "model.embed_tokens.q_scale", |
| "shape": [ |
| 32000, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192000, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.0.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 8192000 |
| }, |
| { |
| "name": "model.layers.0.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 20774912 |
| }, |
| { |
| "name": "model.layers.0.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 22347776 |
| }, |
| { |
| "name": "model.layers.0.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 30736384 |
| } |
| ], |
| "md5sum": "b9ad634c3a1133ba2239edbebecbcaaf" |
| }, |
| { |
| "dataPath": "params_shard_3.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.0.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "44107b290919f876e6154c068bd661ba" |
| }, |
| { |
| "dataPath": "params_shard_4.bin", |
| "format": "raw-shard", |
| "nbytes": 25182208, |
| "records": [ |
| { |
| "name": "model.layers.0.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.0.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 7340032 |
| }, |
| { |
| "name": "model.layers.0.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.0.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 11018240 |
| }, |
| { |
| "name": "model.layers.1.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 11026432 |
| }, |
| { |
| "name": "model.layers.1.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 23609344 |
| } |
| ], |
| "md5sum": "717ec17e1227154411c1a820dc047261" |
| }, |
| { |
| "dataPath": "params_shard_5.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.1.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "38c6b4269ae350b9282fa431c846ed23" |
| }, |
| { |
| "dataPath": "params_shard_6.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.1.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b7239cac09cb8273a156c003010be479" |
| }, |
| { |
| "dataPath": "params_shard_7.bin", |
| "format": "raw-shard", |
| "nbytes": 33046528, |
| "records": [ |
| { |
| "name": "model.layers.1.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.1.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 8388608 |
| }, |
| { |
| "name": "model.layers.1.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.1.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 16777216 |
| }, |
| { |
| "name": "model.layers.1.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 20447232 |
| }, |
| { |
| "name": "model.layers.1.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 20455424 |
| }, |
| { |
| "name": "model.layers.2.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 20463616 |
| } |
| ], |
| "md5sum": "de1f018181fbbdcd4474bd967e6bde47" |
| }, |
| { |
| "dataPath": "params_shard_8.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.2.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7d5f9af4aa13184e1d199522f522e369" |
| }, |
| { |
| "dataPath": "params_shard_9.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.2.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6687708501605032ee921bee4a2253a8" |
| }, |
| { |
| "dataPath": "params_shard_10.bin", |
| "format": "raw-shard", |
| "nbytes": 22036480, |
| "records": [ |
| { |
| "name": "model.layers.2.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.2.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 1572864 |
| }, |
| { |
| "name": "model.layers.2.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 9961472 |
| }, |
| { |
| "name": "model.layers.2.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.2.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 18350080 |
| }, |
| { |
| "name": "model.layers.2.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 22020096 |
| }, |
| { |
| "name": "model.layers.2.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 22028288 |
| } |
| ], |
| "md5sum": "f484c7d54447e8fa3a60554b31576b39" |
| }, |
| { |
| "dataPath": "params_shard_11.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.3.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4255e2d37ee1260ea398fdb377da60fc" |
| }, |
| { |
| "dataPath": "params_shard_12.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.3.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "dacfa3f4e70f731902865d8bd771e2ad" |
| }, |
| { |
| "dataPath": "params_shard_13.bin", |
| "format": "raw-shard", |
| "nbytes": 30932992, |
| "records": [ |
| { |
| "name": "model.layers.3.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.3.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "model.layers.3.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "model.layers.3.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 22544384 |
| }, |
| { |
| "name": "model.layers.3.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 23592960 |
| } |
| ], |
| "md5sum": "6bfed2448f571e67ffc4fb0ea22d7ec4" |
| }, |
| { |
| "dataPath": "params_shard_14.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.4.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bb5427d504bac40c1d589078f9764a05" |
| }, |
| { |
| "dataPath": "params_shard_15.bin", |
| "format": "raw-shard", |
| "nbytes": 27279360, |
| "records": [ |
| { |
| "name": "model.layers.3.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.3.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 3670016 |
| }, |
| { |
| "name": "model.layers.3.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 3678208 |
| }, |
| { |
| "name": "model.layers.4.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 3686400 |
| }, |
| { |
| "name": "model.layers.4.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 16269312 |
| }, |
| { |
| "name": "model.layers.4.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 17842176 |
| }, |
| { |
| "name": "model.layers.4.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 26230784 |
| } |
| ], |
| "md5sum": "6ea7d8604ec7fd9d5ed88ac8c72a8b85" |
| }, |
| { |
| "dataPath": "params_shard_16.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.4.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c01d31ab5e41daccd65013bbb2142aa8" |
| }, |
| { |
| "dataPath": "params_shard_17.bin", |
| "format": "raw-shard", |
| "nbytes": 25182208, |
| "records": [ |
| { |
| "name": "model.layers.4.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.4.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 7340032 |
| }, |
| { |
| "name": "model.layers.4.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.4.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 11018240 |
| }, |
| { |
| "name": "model.layers.5.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 11026432 |
| }, |
| { |
| "name": "model.layers.5.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 23609344 |
| } |
| ], |
| "md5sum": "b56702e3404bdc2aafa82160c94f47ff" |
| }, |
| { |
| "dataPath": "params_shard_18.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.5.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "83520f6b4fa3b4348fc151aada5a9f50" |
| }, |
| { |
| "dataPath": "params_shard_19.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.5.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8aa79d26e4b00dbec3c96452f6f30adf" |
| }, |
| { |
| "dataPath": "params_shard_20.bin", |
| "format": "raw-shard", |
| "nbytes": 33046528, |
| "records": [ |
| { |
| "name": "model.layers.5.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.5.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 8388608 |
| }, |
| { |
| "name": "model.layers.5.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.5.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 16777216 |
| }, |
| { |
| "name": "model.layers.5.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 20447232 |
| }, |
| { |
| "name": "model.layers.5.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 20455424 |
| }, |
| { |
| "name": "model.layers.6.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 20463616 |
| } |
| ], |
| "md5sum": "0b42566d825abc55ea0ee8dd55060446" |
| }, |
| { |
| "dataPath": "params_shard_21.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.6.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "419b16bd7d0d37291266b7f48fa5dcb6" |
| }, |
| { |
| "dataPath": "params_shard_22.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.6.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b070ae27e587e4a482d3411535572b70" |
| }, |
| { |
| "dataPath": "params_shard_23.bin", |
| "format": "raw-shard", |
| "nbytes": 22036480, |
| "records": [ |
| { |
| "name": "model.layers.6.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.6.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 1572864 |
| }, |
| { |
| "name": "model.layers.6.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 9961472 |
| }, |
| { |
| "name": "model.layers.6.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.6.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 18350080 |
| }, |
| { |
| "name": "model.layers.6.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 22020096 |
| }, |
| { |
| "name": "model.layers.6.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 22028288 |
| } |
| ], |
| "md5sum": "5b0d59357e5021db580dff05fe7814bb" |
| }, |
| { |
| "dataPath": "params_shard_24.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.7.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "86689d860fbb0e16097a7e1765f2c36e" |
| }, |
| { |
| "dataPath": "params_shard_25.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.7.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e0b09ac9c8fa7c38bb0654c860b221bf" |
| }, |
| { |
| "dataPath": "params_shard_26.bin", |
| "format": "raw-shard", |
| "nbytes": 30932992, |
| "records": [ |
| { |
| "name": "model.layers.7.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.7.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "model.layers.7.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "model.layers.7.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 22544384 |
| }, |
| { |
| "name": "model.layers.7.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 23592960 |
| } |
| ], |
| "md5sum": "7373b71e9a1a0b89d25dcd6e1d0ebab3" |
| }, |
| { |
| "dataPath": "params_shard_27.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.8.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ba4811273bd1f67f87acce4123c0a4d9" |
| }, |
| { |
| "dataPath": "params_shard_28.bin", |
| "format": "raw-shard", |
| "nbytes": 27279360, |
| "records": [ |
| { |
| "name": "model.layers.7.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.7.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 3670016 |
| }, |
| { |
| "name": "model.layers.7.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 3678208 |
| }, |
| { |
| "name": "model.layers.8.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 3686400 |
| }, |
| { |
| "name": "model.layers.8.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 16269312 |
| }, |
| { |
| "name": "model.layers.8.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 17842176 |
| }, |
| { |
| "name": "model.layers.8.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 26230784 |
| } |
| ], |
| "md5sum": "0c3cd1a038246ac969e16fd27fbee63e" |
| }, |
| { |
| "dataPath": "params_shard_29.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.8.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ac3e5c45718c63c58b1ff2b468511930" |
| }, |
| { |
| "dataPath": "params_shard_30.bin", |
| "format": "raw-shard", |
| "nbytes": 25182208, |
| "records": [ |
| { |
| "name": "model.layers.8.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.8.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 7340032 |
| }, |
| { |
| "name": "model.layers.8.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.8.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 11018240 |
| }, |
| { |
| "name": "model.layers.9.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 11026432 |
| }, |
| { |
| "name": "model.layers.9.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 23609344 |
| } |
| ], |
| "md5sum": "4a92b0010f7779fe8146c7e119ad1e38" |
| }, |
| { |
| "dataPath": "params_shard_31.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.9.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "aa25f9417b4b647b4dd585481684096d" |
| }, |
| { |
| "dataPath": "params_shard_32.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.9.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "644979ebe6bc7ddad8e041fd6643c64f" |
| }, |
| { |
| "dataPath": "params_shard_33.bin", |
| "format": "raw-shard", |
| "nbytes": 33046528, |
| "records": [ |
| { |
| "name": "model.layers.9.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.9.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 8388608 |
| }, |
| { |
| "name": "model.layers.9.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.9.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 16777216 |
| }, |
| { |
| "name": "model.layers.9.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 20447232 |
| }, |
| { |
| "name": "model.layers.9.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 20455424 |
| }, |
| { |
| "name": "model.layers.10.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 20463616 |
| } |
| ], |
| "md5sum": "3ddc1e7858e2835bae793e1b398af8b8" |
| }, |
| { |
| "dataPath": "params_shard_34.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.10.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7eaaf6173a0875d16f06151daf42b60e" |
| }, |
| { |
| "dataPath": "params_shard_35.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.10.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "77fb7d5486f709c3d1cca18426db011b" |
| }, |
| { |
| "dataPath": "params_shard_36.bin", |
| "format": "raw-shard", |
| "nbytes": 22036480, |
| "records": [ |
| { |
| "name": "model.layers.10.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.10.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 1572864 |
| }, |
| { |
| "name": "model.layers.10.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 9961472 |
| }, |
| { |
| "name": "model.layers.10.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.10.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 18350080 |
| }, |
| { |
| "name": "model.layers.10.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 22020096 |
| }, |
| { |
| "name": "model.layers.10.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 22028288 |
| } |
| ], |
| "md5sum": "0d4346ce38f66e41d89d7ed4784833e6" |
| }, |
| { |
| "dataPath": "params_shard_37.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.11.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "088efa948932204d94d80b7a6875cfcc" |
| }, |
| { |
| "dataPath": "params_shard_38.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.11.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "85aa88ee63924ccc34f955bdecc749cc" |
| }, |
| { |
| "dataPath": "params_shard_39.bin", |
| "format": "raw-shard", |
| "nbytes": 30932992, |
| "records": [ |
| { |
| "name": "model.layers.11.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.11.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "model.layers.11.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "model.layers.11.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 22544384 |
| }, |
| { |
| "name": "model.layers.11.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 23592960 |
| } |
| ], |
| "md5sum": "31f7733650c0a63d200a9c940be44555" |
| }, |
| { |
| "dataPath": "params_shard_40.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.12.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b32e72d14b38e6ceb04698c3ad891d01" |
| }, |
| { |
| "dataPath": "params_shard_41.bin", |
| "format": "raw-shard", |
| "nbytes": 27279360, |
| "records": [ |
| { |
| "name": "model.layers.11.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.11.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 3670016 |
| }, |
| { |
| "name": "model.layers.11.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 3678208 |
| }, |
| { |
| "name": "model.layers.12.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 3686400 |
| }, |
| { |
| "name": "model.layers.12.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 16269312 |
| }, |
| { |
| "name": "model.layers.12.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 17842176 |
| }, |
| { |
| "name": "model.layers.12.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 26230784 |
| } |
| ], |
| "md5sum": "175c665ccd16fe72caf13df0f77391ed" |
| }, |
| { |
| "dataPath": "params_shard_42.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.12.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "076b3e469d8d6ea5e6a58ca061429ff0" |
| }, |
| { |
| "dataPath": "params_shard_43.bin", |
| "format": "raw-shard", |
| "nbytes": 25182208, |
| "records": [ |
| { |
| "name": "model.layers.12.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.12.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 7340032 |
| }, |
| { |
| "name": "model.layers.12.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.12.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 11018240 |
| }, |
| { |
| "name": "model.layers.13.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 11026432 |
| }, |
| { |
| "name": "model.layers.13.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 23609344 |
| } |
| ], |
| "md5sum": "d3ff4b7e1f19f50fc687bbeae49715b2" |
| }, |
| { |
| "dataPath": "params_shard_44.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.13.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "82060f5401635818d122cddc5b5c185d" |
| }, |
| { |
| "dataPath": "params_shard_45.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.13.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "33ca18a531504a44eb61575d013da59d" |
| }, |
| { |
| "dataPath": "params_shard_46.bin", |
| "format": "raw-shard", |
| "nbytes": 33046528, |
| "records": [ |
| { |
| "name": "model.layers.13.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.13.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 8388608 |
| }, |
| { |
| "name": "model.layers.13.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.13.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 16777216 |
| }, |
| { |
| "name": "model.layers.13.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 20447232 |
| }, |
| { |
| "name": "model.layers.13.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 20455424 |
| }, |
| { |
| "name": "model.layers.14.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 20463616 |
| } |
| ], |
| "md5sum": "12539be0707daf80af062fe537e0f4f6" |
| }, |
| { |
| "dataPath": "params_shard_47.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.14.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "20a90cec41929343164315d90554e302" |
| }, |
| { |
| "dataPath": "params_shard_48.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.14.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "030432ae84253006e40b4fb977074b31" |
| }, |
| { |
| "dataPath": "params_shard_49.bin", |
| "format": "raw-shard", |
| "nbytes": 22036480, |
| "records": [ |
| { |
| "name": "model.layers.14.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.14.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 1572864 |
| }, |
| { |
| "name": "model.layers.14.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 9961472 |
| }, |
| { |
| "name": "model.layers.14.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.14.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 18350080 |
| }, |
| { |
| "name": "model.layers.14.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 22020096 |
| }, |
| { |
| "name": "model.layers.14.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 22028288 |
| } |
| ], |
| "md5sum": "50bc5999a488a9ad297f82a1486a8f4e" |
| }, |
| { |
| "dataPath": "params_shard_50.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.15.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6ea07ab5a38db124863e509b07acd32d" |
| }, |
| { |
| "dataPath": "params_shard_51.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.15.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "626c8b2a505460b91b5c4b58fc9b7875" |
| }, |
| { |
| "dataPath": "params_shard_52.bin", |
| "format": "raw-shard", |
| "nbytes": 30932992, |
| "records": [ |
| { |
| "name": "model.layers.15.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.15.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "model.layers.15.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "model.layers.15.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 22544384 |
| }, |
| { |
| "name": "model.layers.15.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 23592960 |
| } |
| ], |
| "md5sum": "88700075416df65b850ba48c131725b4" |
| }, |
| { |
| "dataPath": "params_shard_53.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.16.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "842a56a0f7e583ae89e23cb4d099a7c0" |
| }, |
| { |
| "dataPath": "params_shard_54.bin", |
| "format": "raw-shard", |
| "nbytes": 27279360, |
| "records": [ |
| { |
| "name": "model.layers.15.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.15.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 3670016 |
| }, |
| { |
| "name": "model.layers.15.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 3678208 |
| }, |
| { |
| "name": "model.layers.16.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 3686400 |
| }, |
| { |
| "name": "model.layers.16.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 16269312 |
| }, |
| { |
| "name": "model.layers.16.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 17842176 |
| }, |
| { |
| "name": "model.layers.16.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 26230784 |
| } |
| ], |
| "md5sum": "ad2bd38575599ee2a36f279b6225f2fa" |
| }, |
| { |
| "dataPath": "params_shard_55.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.16.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "68cab40b14caeb9255c28b704803c5e6" |
| }, |
| { |
| "dataPath": "params_shard_56.bin", |
| "format": "raw-shard", |
| "nbytes": 25182208, |
| "records": [ |
| { |
| "name": "model.layers.16.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.16.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 7340032 |
| }, |
| { |
| "name": "model.layers.16.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.16.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 11018240 |
| }, |
| { |
| "name": "model.layers.17.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 11026432 |
| }, |
| { |
| "name": "model.layers.17.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 23609344 |
| } |
| ], |
| "md5sum": "d84be5520aee019e832d40fcf36822a2" |
| }, |
| { |
| "dataPath": "params_shard_57.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.17.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5511bec942d3afdd65936d0da857722f" |
| }, |
| { |
| "dataPath": "params_shard_58.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.17.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "83f667fd608df6263e5ed7473a6faa23" |
| }, |
| { |
| "dataPath": "params_shard_59.bin", |
| "format": "raw-shard", |
| "nbytes": 33046528, |
| "records": [ |
| { |
| "name": "model.layers.17.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.17.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 8388608 |
| }, |
| { |
| "name": "model.layers.17.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.17.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 16777216 |
| }, |
| { |
| "name": "model.layers.17.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 20447232 |
| }, |
| { |
| "name": "model.layers.17.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 20455424 |
| }, |
| { |
| "name": "model.layers.18.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 20463616 |
| } |
| ], |
| "md5sum": "589bb0a1ab6cc09bdd82783882a55694" |
| }, |
| { |
| "dataPath": "params_shard_60.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.18.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "27335bb9a3b5fa4324e4ff3ad39cf993" |
| }, |
| { |
| "dataPath": "params_shard_61.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.18.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "44add29b1c56f7f02b89e6e06b07195b" |
| }, |
| { |
| "dataPath": "params_shard_62.bin", |
| "format": "raw-shard", |
| "nbytes": 22036480, |
| "records": [ |
| { |
| "name": "model.layers.18.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.18.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 1572864 |
| }, |
| { |
| "name": "model.layers.18.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 9961472 |
| }, |
| { |
| "name": "model.layers.18.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.18.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 18350080 |
| }, |
| { |
| "name": "model.layers.18.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 22020096 |
| }, |
| { |
| "name": "model.layers.18.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 22028288 |
| } |
| ], |
| "md5sum": "dc4afe30101e1f2325138af45e2c5292" |
| }, |
| { |
| "dataPath": "params_shard_63.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.19.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "219aba7b043ca595d412f23a6bf5a8c5" |
| }, |
| { |
| "dataPath": "params_shard_64.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.19.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cbecf041fe891ef327960cb09b1dc37b" |
| }, |
| { |
| "dataPath": "params_shard_65.bin", |
| "format": "raw-shard", |
| "nbytes": 30932992, |
| "records": [ |
| { |
| "name": "model.layers.19.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.19.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "model.layers.19.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "model.layers.19.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 22544384 |
| }, |
| { |
| "name": "model.layers.19.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 23592960 |
| } |
| ], |
| "md5sum": "8d9c6d6891df20fcff00f2ee710557a1" |
| }, |
| { |
| "dataPath": "params_shard_66.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.20.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2c87245d5fc6cc7c5f49bcd9bfa02bc5" |
| }, |
| { |
| "dataPath": "params_shard_67.bin", |
| "format": "raw-shard", |
| "nbytes": 27279360, |
| "records": [ |
| { |
| "name": "model.layers.19.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.19.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 3670016 |
| }, |
| { |
| "name": "model.layers.19.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 3678208 |
| }, |
| { |
| "name": "model.layers.20.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 3686400 |
| }, |
| { |
| "name": "model.layers.20.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 16269312 |
| }, |
| { |
| "name": "model.layers.20.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 17842176 |
| }, |
| { |
| "name": "model.layers.20.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 26230784 |
| } |
| ], |
| "md5sum": "e2dec404e99446470a2054abf6d2a1b6" |
| }, |
| { |
| "dataPath": "params_shard_68.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.20.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3f66d2e950e68589fcc70bab8da254ff" |
| }, |
| { |
| "dataPath": "params_shard_69.bin", |
| "format": "raw-shard", |
| "nbytes": 25182208, |
| "records": [ |
| { |
| "name": "model.layers.20.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.20.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 7340032 |
| }, |
| { |
| "name": "model.layers.20.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.20.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 11018240 |
| }, |
| { |
| "name": "model.layers.21.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 11026432 |
| }, |
| { |
| "name": "model.layers.21.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 23609344 |
| } |
| ], |
| "md5sum": "60afeee7b1ffb6a5fa5e2cae76a1e6fb" |
| }, |
| { |
| "dataPath": "params_shard_70.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.21.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bc241bf3bfcb43f07068f9c84e66c662" |
| }, |
| { |
| "dataPath": "params_shard_71.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.21.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "09d8eddef2e30603a5a53e3157b5a0ae" |
| }, |
| { |
| "dataPath": "params_shard_72.bin", |
| "format": "raw-shard", |
| "nbytes": 33046528, |
| "records": [ |
| { |
| "name": "model.layers.21.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.21.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 8388608 |
| }, |
| { |
| "name": "model.layers.21.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.21.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 16777216 |
| }, |
| { |
| "name": "model.layers.21.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 20447232 |
| }, |
| { |
| "name": "model.layers.21.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 20455424 |
| }, |
| { |
| "name": "model.layers.22.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 20463616 |
| } |
| ], |
| "md5sum": "4e21374fb169864295a70e05bf77843e" |
| }, |
| { |
| "dataPath": "params_shard_73.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.22.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "798084215b2a4c8c3bc834c22c163af7" |
| }, |
| { |
| "dataPath": "params_shard_74.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.22.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6d3227175ca9bf4b5f4c7bae892fcebd" |
| }, |
| { |
| "dataPath": "params_shard_75.bin", |
| "format": "raw-shard", |
| "nbytes": 22036480, |
| "records": [ |
| { |
| "name": "model.layers.22.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.22.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 1572864 |
| }, |
| { |
| "name": "model.layers.22.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 9961472 |
| }, |
| { |
| "name": "model.layers.22.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.22.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 18350080 |
| }, |
| { |
| "name": "model.layers.22.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 22020096 |
| }, |
| { |
| "name": "model.layers.22.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 22028288 |
| } |
| ], |
| "md5sum": "f3f2f2eb894ad1c498aa31c0308a2e60" |
| }, |
| { |
| "dataPath": "params_shard_76.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.23.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "06f878061a9ca1a9db307c125b2dec8a" |
| }, |
| { |
| "dataPath": "params_shard_77.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.23.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3b83323aad9509de69b17d9cefcf9eac" |
| }, |
| { |
| "dataPath": "params_shard_78.bin", |
| "format": "raw-shard", |
| "nbytes": 30932992, |
| "records": [ |
| { |
| "name": "model.layers.23.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.23.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "model.layers.23.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "model.layers.23.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 22544384 |
| }, |
| { |
| "name": "model.layers.23.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 23592960 |
| } |
| ], |
| "md5sum": "fcd03ffc2f4913ad199d5e25258af100" |
| }, |
| { |
| "dataPath": "params_shard_79.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.24.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "47073c6581c2dfe54d9c45bbc04a8d48" |
| }, |
| { |
| "dataPath": "params_shard_80.bin", |
| "format": "raw-shard", |
| "nbytes": 27279360, |
| "records": [ |
| { |
| "name": "model.layers.23.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.23.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 3670016 |
| }, |
| { |
| "name": "model.layers.23.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 3678208 |
| }, |
| { |
| "name": "model.layers.24.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 3686400 |
| }, |
| { |
| "name": "model.layers.24.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 16269312 |
| }, |
| { |
| "name": "model.layers.24.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 17842176 |
| }, |
| { |
| "name": "model.layers.24.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 26230784 |
| } |
| ], |
| "md5sum": "11390ad421d38afd76fec058952acd92" |
| }, |
| { |
| "dataPath": "params_shard_81.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.24.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7954727fec023ef6f9dde7af71633b6a" |
| }, |
| { |
| "dataPath": "params_shard_82.bin", |
| "format": "raw-shard", |
| "nbytes": 25182208, |
| "records": [ |
| { |
| "name": "model.layers.24.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.24.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 7340032 |
| }, |
| { |
| "name": "model.layers.24.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.24.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 11018240 |
| }, |
| { |
| "name": "model.layers.25.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 11026432 |
| }, |
| { |
| "name": "model.layers.25.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 23609344 |
| } |
| ], |
| "md5sum": "289eadf9123efca05ffd0055094bc8ed" |
| }, |
| { |
| "dataPath": "params_shard_83.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.25.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "295195734f58e386adfc264052f4ff35" |
| }, |
| { |
| "dataPath": "params_shard_84.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.25.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c63f2862167a32705ea76d71dd290f0a" |
| }, |
| { |
| "dataPath": "params_shard_85.bin", |
| "format": "raw-shard", |
| "nbytes": 33046528, |
| "records": [ |
| { |
| "name": "model.layers.25.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.25.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 8388608 |
| }, |
| { |
| "name": "model.layers.25.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.25.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 16777216 |
| }, |
| { |
| "name": "model.layers.25.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 20447232 |
| }, |
| { |
| "name": "model.layers.25.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 20455424 |
| }, |
| { |
| "name": "model.layers.26.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 20463616 |
| } |
| ], |
| "md5sum": "f82105042c25e5b687c1ea11c7ec338b" |
| }, |
| { |
| "dataPath": "params_shard_86.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.26.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "407b6153d6a415f31ecf1869afdb7131" |
| }, |
| { |
| "dataPath": "params_shard_87.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.26.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "766d17b7a621aaebacd10f3773214514" |
| }, |
| { |
| "dataPath": "params_shard_88.bin", |
| "format": "raw-shard", |
| "nbytes": 22036480, |
| "records": [ |
| { |
| "name": "model.layers.26.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.26.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 1572864 |
| }, |
| { |
| "name": "model.layers.26.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 9961472 |
| }, |
| { |
| "name": "model.layers.26.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.26.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 18350080 |
| }, |
| { |
| "name": "model.layers.26.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 22020096 |
| }, |
| { |
| "name": "model.layers.26.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 22028288 |
| } |
| ], |
| "md5sum": "987ca7537e2e73af6ad2bc5b3a7d9449" |
| }, |
| { |
| "dataPath": "params_shard_89.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.27.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "017b5837c3992a60457a79e72cacc626" |
| }, |
| { |
| "dataPath": "params_shard_90.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.27.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f22a1a0fd6dfa905299a65eaecc03129" |
| }, |
| { |
| "dataPath": "params_shard_91.bin", |
| "format": "raw-shard", |
| "nbytes": 30932992, |
| "records": [ |
| { |
| "name": "model.layers.27.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.27.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "model.layers.27.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "model.layers.27.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 22544384 |
| }, |
| { |
| "name": "model.layers.27.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 23592960 |
| } |
| ], |
| "md5sum": "91019f28d13461e2926f5281d5bcb71b" |
| }, |
| { |
| "dataPath": "params_shard_92.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.28.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7e5180ff86b725845b2c249c166ebe37" |
| }, |
| { |
| "dataPath": "params_shard_93.bin", |
| "format": "raw-shard", |
| "nbytes": 27279360, |
| "records": [ |
| { |
| "name": "model.layers.27.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.27.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 3670016 |
| }, |
| { |
| "name": "model.layers.27.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 3678208 |
| }, |
| { |
| "name": "model.layers.28.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 3686400 |
| }, |
| { |
| "name": "model.layers.28.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 16269312 |
| }, |
| { |
| "name": "model.layers.28.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 17842176 |
| }, |
| { |
| "name": "model.layers.28.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 26230784 |
| } |
| ], |
| "md5sum": "962c6b434bb9c2fa4f9188dd4de17f06" |
| }, |
| { |
| "dataPath": "params_shard_94.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.28.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2371a17cce67065e7dcd215e5efa35c1" |
| }, |
| { |
| "dataPath": "params_shard_95.bin", |
| "format": "raw-shard", |
| "nbytes": 25182208, |
| "records": [ |
| { |
| "name": "model.layers.28.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.28.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 7340032 |
| }, |
| { |
| "name": "model.layers.28.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.28.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 11018240 |
| }, |
| { |
| "name": "model.layers.29.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 11026432 |
| }, |
| { |
| "name": "model.layers.29.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 23609344 |
| } |
| ], |
| "md5sum": "049232b26fd661de4215be5883642d29" |
| }, |
| { |
| "dataPath": "params_shard_96.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.29.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d1e12289d3542e2b5717b2f2208b0a59" |
| }, |
| { |
| "dataPath": "params_shard_97.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.29.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "283339ebdfe83ed00863c24ddcb7c0cf" |
| }, |
| { |
| "dataPath": "params_shard_98.bin", |
| "format": "raw-shard", |
| "nbytes": 33046528, |
| "records": [ |
| { |
| "name": "model.layers.29.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.29.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 8388608 |
| }, |
| { |
| "name": "model.layers.29.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.29.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 16777216 |
| }, |
| { |
| "name": "model.layers.29.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 20447232 |
| }, |
| { |
| "name": "model.layers.29.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 20455424 |
| }, |
| { |
| "name": "model.layers.30.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 20463616 |
| } |
| ], |
| "md5sum": "61edde125d206ee4068511cba24ed61f" |
| }, |
| { |
| "dataPath": "params_shard_99.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.30.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "16c3f02e527dfe7039d5c21f7b73634b" |
| }, |
| { |
| "dataPath": "params_shard_100.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.30.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "281560e4bb0abb04c74c6ef6e1edc8db" |
| }, |
| { |
| "dataPath": "params_shard_101.bin", |
| "format": "raw-shard", |
| "nbytes": 22036480, |
| "records": [ |
| { |
| "name": "model.layers.30.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.30.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 1572864 |
| }, |
| { |
| "name": "model.layers.30.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 9961472 |
| }, |
| { |
| "name": "model.layers.30.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.30.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 18350080 |
| }, |
| { |
| "name": "model.layers.30.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 22020096 |
| }, |
| { |
| "name": "model.layers.30.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 22028288 |
| } |
| ], |
| "md5sum": "b4b58c5e010350402e3ad9022fca7fee" |
| }, |
| { |
| "dataPath": "params_shard_102.bin", |
| "format": "raw-shard", |
| "nbytes": 58720256, |
| "records": [ |
| { |
| "name": "model.layers.31.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 28672, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 58720256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c7dc9bb31b7d0aecc5615c46a96dc2a8" |
| }, |
| { |
| "dataPath": "params_shard_103.bin", |
| "format": "raw-shard", |
| "nbytes": 29360128, |
| "records": [ |
| { |
| "name": "model.layers.31.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1792 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 29360128, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c97e4fb122eb88ace1310a156b335459" |
| }, |
| { |
| "dataPath": "params_shard_104.bin", |
| "format": "raw-shard", |
| "nbytes": 30932992, |
| "records": [ |
| { |
| "name": "model.layers.31.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 6144, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.31.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 6144, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "model.layers.31.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "model.layers.31.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 1048576, |
| "byteOffset": 22544384 |
| }, |
| { |
| "name": "model.layers.31.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 28672, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 7340032, |
| "byteOffset": 23592960 |
| } |
| ], |
| "md5sum": "db9ade35db53aa953d2a5b9b40faf437" |
| }, |
| { |
| "dataPath": "params_shard_105.bin", |
| "format": "raw-shard", |
| "nbytes": 65536000, |
| "records": [ |
| { |
| "name": "lm_head.q_weight", |
| "shape": [ |
| 32000, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 65536000, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d0c2e9348270fd3413eecf40cc515729" |
| }, |
| { |
| "dataPath": "params_shard_106.bin", |
| "format": "raw-shard", |
| "nbytes": 11886592, |
| "records": [ |
| { |
| "name": "model.layers.31.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 448 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 3670016, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.31.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 3670016 |
| }, |
| { |
| "name": "model.layers.31.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 3678208 |
| }, |
| { |
| "name": "model.norm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192, |
| "byteOffset": 3686400 |
| }, |
| { |
| "name": "lm_head.q_scale", |
| "shape": [ |
| 32000, |
| 128 |
| ], |
| "dtype": "bfloat16", |
| "format": "raw", |
| "nbytes": 8192000, |
| "byteOffset": 3694592 |
| } |
| ], |
| "md5sum": "6a2926f35133e3f72911941f64f8330b" |
| } |
| ] |
| } |