Bielik-7B-Instruct-v0.1-q4f32_1-MLC / ndarray-cache-b16.json
onceuponai's picture
Upload 73 files
9d245e3 verified
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 4526981120.0,
"BitsPerParam": 5.000992646497372
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 65536000,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32000,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536000,
"byteOffset": 0
}
],
"md5sum": "003316f3b47341bc3e79f7869f4a4ab9"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "5bacc2b4e2d982b3a0cb98db929ef0df"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "fcb1efe9f3b161eeaa54273f99d7fde3"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "9db6fa7cf9f448e8574c945bfafbeebe"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "8784bc4594a15600bbcb17eaab8f0a84"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 30244864,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32000,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192000,
"byteOffset": 0
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 8192000
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 8200192
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 11870208
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 19210240
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 19218432
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 19226624
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 22896640
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 30236672
}
],
"md5sum": "bbc2651bcedd21e4b4095e5eeb0581c1"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "aceeb5f633216e6737723dcd0dd4b3f5"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3033b517a1a85b7a5300a770051abf5c"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "6944ccb7ddc338ae9b05e23fa43755d9"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0bb14b121d5b1300518050d1e109d690"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "4498a8c114f952d9d4cbb9a8c52570b8"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "168f8789848f3fde0d5fe7f178294b18"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "e4a20c8d5798f9e6f945b573edb1c7cc"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "1ca8306112db7de07d3e82a31e6a386a"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "500cba14d9a11c3884a37f5cb91bb466"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "a74356664d3620e65e33e53224591fdd"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "a84fb1c513ec429df785c54d72d776d7"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "947e3151de1afd36869ecb3c78eee4aa"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "d1205a0edea3162bf123f2c31c7ab033"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6de4a6db1e634362617f004db60da45f"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "2d60e934d62482d9b01ba9e8a5d3c540"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "4853fb72eb5fd785be2d938c44b8c41f"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "fa0417e8fed075b4939e2c99c5efdaf4"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "28da2f816fdab9a43c3761097b3b2fa9"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "66df09e85d04dfb6dfeeb27d4ccee0cd"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "ae062838892632df9ecb9c01e821cee5"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "92bb91c9fbb98df8be037d990d2c4bce"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b60242b9e9303c5aa69e9e9611c5b34e"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "057526fab8236a021089e564d34fcda7"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "61408872420e5f8c6a30ad5dc8daa76f"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "4906c7827e7f183b991bb0961273c48f"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "24a4a2e9eccbcd75f6e4be81c8f33607"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 65536000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32000,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536000,
"byteOffset": 0
}
],
"md5sum": "65852c20b729803a23c8f428fe968b85"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "cc43fc6d2615bad486deac9336623105"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 31801344,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32000,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192000,
"byteOffset": 23601152
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 31793152
}
],
"md5sum": "64655547f9fc8f56c4f28c135187061f"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "36e02e13900ecc6130bcde91ce2fc396"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "f9254608fc059925b55f7d8f1204560f"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "a50870ee0da31b4336c2a3e42f8d03e8"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3a1b66bfc81bdc7ddb73b48f279085ab"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "e0174bf748e548b38166ff43324e88c5"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "2e277cf06d2b7470e118026e1991948a"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 32505856,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 18350080
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 30932992
}
],
"md5sum": "80663bcfb21a088edd59f26aecca75ae"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "af4e2ea073fc3e0927158ac690ff821c"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "a1aed73e77bd5ef3039ee784c00730f7"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "04b245b189f2ae464cb827fcce09637b"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "376b4e76c6e846dd4f9e3301192a8e57"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "84f4c0a7e9c68440fbbd51d22a71d3f6"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "b52b2f75469d8abd2af997bf6c51cb06"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "1838e312be27dfa05025de943be02b19"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "490e57593c593f5319636c1ed9e84077"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "2206827424886d2f3f64368c6da9c9b7"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "9397498690f490b58683f054963dc496"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "9da6e626552d556201bdb9d930b5da7f"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "173bbb74d222a42b949b77fa4adf0867"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "eb2533ac40dc4d26e64afb0de45a2af5"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "eec0ac41051847fcae240b6f127750a9"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f5dc3d21ea1adbb522b7e0ff5b4f7390"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "645a570f73745e2d952a6be7f6de0daf"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "e7e38cc9ca3b8224c2ea0437b09bf8f5"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b5c05e98fb23234d4fa19672aac06782"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "178c45882109839d800a34dbdba50c13"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "087c2bcb7be3bfdfefc7d9a408ae78e9"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "359b02b147f5b8ce20c2234c558c95f7"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "b6607405c8a298d825374aab0e3ead08"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "816f5a18b13bc54202fe0143f087a056"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "b1abf5957e6d0f0e57afb504bbca108d"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f912ad775e6c7017bceedd6d700f847b"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "adeb7e14f19528d97fd6ee12f0e862ff"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ae545d39cf85e587e413a4117c09bea0"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3e1cfa465028b91969757100d466e71d"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "02721cdb3446de25b1f2668fa8278695"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 13115392
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 13123584
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 13131776
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 16801792
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 24141824
}
],
"md5sum": "8a564c25e266ad3e959496a0bd7179e5"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "7056c630c41e679615dddbd7cedfdd8d"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "9a5c5902438daf33dcf8f51120c99a6d"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "883d4f4ba902ad292111cfa6cfb3d633"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "80e883337d6e3a2720d97ec2b77c541b"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "b397eea576f4ff453c3b12e5004e18a9"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "e912d62ec9c622fd0e194fb403607043"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "418214e8a63107fef3ca76658da60427"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d1de93aaec9ad2278c6765f47366b9e8"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "18afd7e64ace5ea245b3e8ed7b386816"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "95559a73853306d29bd172752d9a9606"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "64174c314fa1fe7182d55459f293ad5c"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "24501dc999f48552639a27ec3e892884"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "cdd37ded1d4e993108acb36dbffedc8b"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "8da2e0a59e8982773415653ff5ee5a5f"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "e42371f9f3c3e8d641fa594fd7fc9938"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "b514275416138c59bcfe3a04db452d71"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "b24f0d2c865b55f87ee12aee685395b2"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "6ef25c9a6d8e2018465d34e6a41c68cf"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "fd03f3b6643c9a6bfaa2d5ba35ba4b35"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "c1ec3e20ce56fd0571ece4422c87818d"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "2e222bd53998012e0f85705d7a199b37"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "4104647f4a5f2871f09aa0ddfe9e6f38"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "755c3b0a0c21dc1eee4fdf64ee4617ff"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "1f651f605677e9a3288aaff636fcc969"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3efdd8783cdc288383c9d6ef258f5770"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "d98507a311f238f914e5885677e057bc"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "cb9305cea69671e5bdc4d2973613aa46"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3e84373f7f793a742dc4a9eeb158859b"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "35d4726cb8404ca578a1cf78dd35fd29"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d6159e3e1e170e2ecb9f783df6826f65"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "7b66451b5793de58a2375f228fea2b4e"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "cae5cb582a74d484cb6a0a26311fd683"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "2933ffe9b82e2b19a22aecbf15d27bab"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 9437184
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 22020096
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 23592960
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 31981568
}
],
"md5sum": "ecc032bc2912800acaab0d242df0cde8"
}
]
}