diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4383 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 4517404672.0, + "BitsPerParam": 4.500381277757404 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 262668288, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 128256, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 262668288, + "byteOffset": 0 + } + ], + "md5sum": "adfe87a428378917c40b13991ec85ec2" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "301c721ed29ba40fc06891b32c30922f" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 32841728, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 128256, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32833536, + "byteOffset": 0 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32833536 + } + ], + "md5sum": "12d4ba3cf62d3a787d84173266079bde" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 262668288, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 128256, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 262668288, + "byteOffset": 0 + } + ], + "md5sum": "ba53bcbc5a7ad9bdf66a93b23571dfe2" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 32833536, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 128256, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32833536, + "byteOffset": 0 + } + ], + "md5sum": "ec1f5c954a5962ff7a038ec0f8e110e2" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 33054720, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 3694592 + } + ], + "md5sum": "b09cb747aededd5fc1531963865285ab" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "c7e585cda831eaabb0fd9a8853f30d01" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "08e41e13a407767874806e03d6a5dec5" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f471fe6e9755ac0d06c7390e1f6c58d5" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "559330eda52d3b17e586b2e53419225b" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "dc501062a1a04991ba92d77bd1d5c08c" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c17cb985e2aa1113ceeed2468eda50d1" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "6f42359c80b381ea792750f4e94c2890" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "0a14bb6c5865c347365f204ed9910276" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0d5135a6eaa265214fb6562e7530f790" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "371c6da845d94b08f2fe2b2dca74d98e" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "83b5ff69a436ff8c66975a416d25bdd2" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "cb3d968f42459a344be229c6e4c1028e" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "1510c5d85968671e5875353382b16575" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "367ab517e5d24869dd89a73dcdeaac02" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "cdede0215075e0d894d104d6f2ed6ce6" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "78e1ae43a564ff6898e53319bd46bb60" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "2870c49eace338897aeba173457582d2" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "6115ab3aff493b085661e28faa5e6148" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "70ffdebab266c25bc0323a66b33920c2" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "cf80fd84b4f13d5a27020925ef3f3e91" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "046196123c320e048ca78f4d0486bd01" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5a7afd9c3f3765c7bb796adfff06d8e0" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "dc7ce278d8836e113a70ebca4964eb9b" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "4edf4a1081d4a8c8d6098dccac21623c" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0807894887d378d34984ecd9c8971213" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "866b5a77c113a5369553cf4e8c518bc5" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "a5cc4a701e70582aae417c35153df8a1" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "63c696ec4811ec6f7373829f8d3a789f" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "bb81878692e76190e494ef08443ad97d" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "7a87fe2bab6be06e72c397f5412e5845" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "857aa112acac161f3ed925f14066f4e9" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "369d7a9a86da6e0c2f468be4eedac19d" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "36d6c464d22cb6a24e2532fc223b648d" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "f914c0c124ff8cdc10495f6cdc5740b3" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0ab9b35fe9148418fa5ab36cabd4722e" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "60022caa8d6f116c783e776cb92b93e1" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "e46a7cd77481fd9910827a550f11e4b0" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "39d114d36962373c35811eb2c3e921bd" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "81d0327cffa817fb278960d9d762fbe2" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "8bcb48b132ff2a2cca7ad5d751b950d8" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "87ca08ecf4f4380e251dea449b263742" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "09c8e61a722cf7560e32da9c92bd8c49" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "095236c384684fdd0ab18388224d4c17" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "1f317762ba3a2acaff4d730e540714d9" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7ca6b874093f8b0e7396a8093fb4664b" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "04e5a65f2d6c1f5fa24ba98fbd6226b7" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "7e5b384553fd22ce8f9bb1414880e4c2" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f97f544aad980cbe647fa87ae25a09c4" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "b563663e6fc7bf34277be5b4dca51c72" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "fe53312c8f07e9ff89627b704b01c574" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b4b4162d84001b414ca80673d1f7fb7d" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "ec2f98c26c7d071bde06ca670dc3a6d0" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "5457c2c4f0002f03c48b1198f183bda8" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "c6a577ecebc2d9a0dbb91dc4190f78f1" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0a171bb41986b3e19349964878cbd5b2" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "a291d2751562d8b3a56aae4775456c22" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "42819f48f6b94c83824fdfa529003697" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d16420c44be62e9374f997e23c190929" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "f9ee7beeb041ce1bb13c9be22524669c" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "2c841b9d790eec0d2cf0315956936af9" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "b3b32c45aa461668b6ce32c3dc26fc9e" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 30932992, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 23592960 + } + ], + "md5sum": "1b53dc1df1b9c7e7f98b63319e81b580" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3d243b9297ca3ced83424f19da9c906a" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "2befac37492cc97f2ac27d97b5fadbfa" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "46417af071d40f518b6cf54b0fa7a618" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "40914a0e3ff98369b9af2fdc5d7124e8" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "86100dd5a734e4c9866c0044e3065527" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 3686400 + } + ], + "md5sum": "6997037062dfc00742649d760e95662a" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "96607bb0a3fb4866cd20951c68deee5a" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "ee9ab1710780b9c48197747494961541" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "20524fd82b02146bb2e531e01c7109e5" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "22594f9bfa453a22b677c28907930b90" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "ce582f6198f3e315128216c30e0673cd" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f7ad8ee36d0ffe9e98b142acdbbee413" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "9f9809b90cd6375601ad9b534237a71a" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "37ce22388b5b1b2cbc4b665eb43455c4" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9c16f8e998a02492b85ca202dd5297d8" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "d3b8672b676648c4b7ef2db040427a72" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "86a8870c5d42a08185b920d4eb3a76c9" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5ea7d60c69beee98cb6f92c868ed2323" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "59bf5be2b778268a6b06332f1280f393" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "7f79903dce6cd047d55b2f95bcc2a0a8" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "57ef3f50a09da93b5ff47e2ba69a8475" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "71b4dc8b594490a2a01c365f057ebc01" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "c16f2f2632b07547dfa24ed857c5b91c" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "70b5e27683d25f3557dc3d725381ce39" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "cb9473b603671ecc8f751f6d2365d690" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "d4940c498a8c6ffc766c9c74c66b3bec" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "ed32eb3c8f3ea960ab896c71e80b8f4f" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "264c6abb3f313f8c35bcc1f645874247" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "4a58e29d9d808c81d03d3dbcfdfb045c" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "8c6ebcbc0c1edb82ae38834f3d8ab857" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b5d0ad64dd22f1bd25e4d715657998a0" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "c54e8b92ec26f7e95adaafeeaebca69d" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "7bcbf6a51b09e9921db3ebb6f7e102ef" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "a378ea29de16509b144c46a7a612d48b" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "8d9148fa170a4e45500236c78f715992" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "660c1153b28ba3b74a41c4b4f4578026" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "62a89509dcbb71a0414eb294f7ae59fc" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "5fb895b67f51aa42351d69bc644e5628" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 32505856, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 11010048 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 18350080 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 30932992 + } + ], + "md5sum": "47ee113afc6d40909e76f51b73a8cc89" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 9437184, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + } + ], + "md5sum": "af1ad7660baa5e1b279f1b85da305a14" + } + ] +} \ No newline at end of file