{ "metadata": { "ParamSize": 325, "ParamBytes": 4517404672.0, "BitsPerParam": 4.500381277757404 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 262668288, "records": [ { "name": "lm_head.q_weight", "shape": [ 128256, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262668288, "byteOffset": 0 } ], "md5sum": "adfe87a428378917c40b13991ec85ec2" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "301c721ed29ba40fc06891b32c30922f" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 32841728, "records": [ { "name": "lm_head.q_scale", "shape": [ 128256, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32833536, "byteOffset": 0 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32833536 } ], "md5sum": "12d4ba3cf62d3a787d84173266079bde" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 262668288, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262668288, "byteOffset": 0 } ], "md5sum": "ba53bcbc5a7ad9bdf66a93b23571dfe2" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 32833536, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32833536, "byteOffset": 0 } ], "md5sum": "ec1f5c954a5962ff7a038ec0f8e110e2" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 33054720, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3694592 } ], "md5sum": "b09cb747aededd5fc1531963865285ab" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c7e585cda831eaabb0fd9a8853f30d01" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "08e41e13a407767874806e03d6a5dec5" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f471fe6e9755ac0d06c7390e1f6c58d5" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "559330eda52d3b17e586b2e53419225b" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "dc501062a1a04991ba92d77bd1d5c08c" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c17cb985e2aa1113ceeed2468eda50d1" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "6f42359c80b381ea792750f4e94c2890" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "0a14bb6c5865c347365f204ed9910276" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0d5135a6eaa265214fb6562e7530f790" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "371c6da845d94b08f2fe2b2dca74d98e" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "83b5ff69a436ff8c66975a416d25bdd2" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cb3d968f42459a344be229c6e4c1028e" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "1510c5d85968671e5875353382b16575" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "367ab517e5d24869dd89a73dcdeaac02" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "cdede0215075e0d894d104d6f2ed6ce6" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "78e1ae43a564ff6898e53319bd46bb60" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "2870c49eace338897aeba173457582d2" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "6115ab3aff493b085661e28faa5e6148" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "70ffdebab266c25bc0323a66b33920c2" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "cf80fd84b4f13d5a27020925ef3f3e91" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "046196123c320e048ca78f4d0486bd01" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5a7afd9c3f3765c7bb796adfff06d8e0" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "dc7ce278d8836e113a70ebca4964eb9b" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "4edf4a1081d4a8c8d6098dccac21623c" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0807894887d378d34984ecd9c8971213" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "866b5a77c113a5369553cf4e8c518bc5" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "a5cc4a701e70582aae417c35153df8a1" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "63c696ec4811ec6f7373829f8d3a789f" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bb81878692e76190e494ef08443ad97d" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "7a87fe2bab6be06e72c397f5412e5845" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "857aa112acac161f3ed925f14066f4e9" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "369d7a9a86da6e0c2f468be4eedac19d" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "36d6c464d22cb6a24e2532fc223b648d" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "f914c0c124ff8cdc10495f6cdc5740b3" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0ab9b35fe9148418fa5ab36cabd4722e" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "60022caa8d6f116c783e776cb92b93e1" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "e46a7cd77481fd9910827a550f11e4b0" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "39d114d36962373c35811eb2c3e921bd" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "81d0327cffa817fb278960d9d762fbe2" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "8bcb48b132ff2a2cca7ad5d751b950d8" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "87ca08ecf4f4380e251dea449b263742" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "09c8e61a722cf7560e32da9c92bd8c49" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "095236c384684fdd0ab18388224d4c17" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "1f317762ba3a2acaff4d730e540714d9" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7ca6b874093f8b0e7396a8093fb4664b" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "04e5a65f2d6c1f5fa24ba98fbd6226b7" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "7e5b384553fd22ce8f9bb1414880e4c2" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f97f544aad980cbe647fa87ae25a09c4" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "b563663e6fc7bf34277be5b4dca51c72" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "fe53312c8f07e9ff89627b704b01c574" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b4b4162d84001b414ca80673d1f7fb7d" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "ec2f98c26c7d071bde06ca670dc3a6d0" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "5457c2c4f0002f03c48b1198f183bda8" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "c6a577ecebc2d9a0dbb91dc4190f78f1" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0a171bb41986b3e19349964878cbd5b2" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "a291d2751562d8b3a56aae4775456c22" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "42819f48f6b94c83824fdfa529003697" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d16420c44be62e9374f997e23c190929" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "f9ee7beeb041ce1bb13c9be22524669c" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "2c841b9d790eec0d2cf0315956936af9" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "b3b32c45aa461668b6ce32c3dc26fc9e" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 30932992, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 23592960 } ], "md5sum": "1b53dc1df1b9c7e7f98b63319e81b580" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3d243b9297ca3ced83424f19da9c906a" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "2befac37492cc97f2ac27d97b5fadbfa" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "46417af071d40f518b6cf54b0fa7a618" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "40914a0e3ff98369b9af2fdc5d7124e8" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "86100dd5a734e4c9866c0044e3065527" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3686400 } ], "md5sum": "6997037062dfc00742649d760e95662a" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "96607bb0a3fb4866cd20951c68deee5a" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "ee9ab1710780b9c48197747494961541" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "20524fd82b02146bb2e531e01c7109e5" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "22594f9bfa453a22b677c28907930b90" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "ce582f6198f3e315128216c30e0673cd" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f7ad8ee36d0ffe9e98b142acdbbee413" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "9f9809b90cd6375601ad9b534237a71a" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "37ce22388b5b1b2cbc4b665eb43455c4" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9c16f8e998a02492b85ca202dd5297d8" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "d3b8672b676648c4b7ef2db040427a72" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "86a8870c5d42a08185b920d4eb3a76c9" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5ea7d60c69beee98cb6f92c868ed2323" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "59bf5be2b778268a6b06332f1280f393" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "7f79903dce6cd047d55b2f95bcc2a0a8" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "57ef3f50a09da93b5ff47e2ba69a8475" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "71b4dc8b594490a2a01c365f057ebc01" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c16f2f2632b07547dfa24ed857c5b91c" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "70b5e27683d25f3557dc3d725381ce39" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cb9473b603671ecc8f751f6d2365d690" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "d4940c498a8c6ffc766c9c74c66b3bec" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "ed32eb3c8f3ea960ab896c71e80b8f4f" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "264c6abb3f313f8c35bcc1f645874247" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "4a58e29d9d808c81d03d3dbcfdfb045c" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "8c6ebcbc0c1edb82ae38834f3d8ab857" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b5d0ad64dd22f1bd25e4d715657998a0" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "c54e8b92ec26f7e95adaafeeaebca69d" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "7bcbf6a51b09e9921db3ebb6f7e102ef" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "a378ea29de16509b144c46a7a612d48b" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8d9148fa170a4e45500236c78f715992" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "660c1153b28ba3b74a41c4b4f4578026" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "62a89509dcbb71a0414eb294f7ae59fc" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "5fb895b67f51aa42351d69bc644e5628" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 32505856, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11010048 }, { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 18350080 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 30932992 } ], "md5sum": "47ee113afc6d40909e76f51b73a8cc89" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 9437184, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 } ], "md5sum": "af1ad7660baa5e1b279f1b85da305a14" } ] }