diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4311 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 3048549376.0, + "BitsPerParam": 2.5538732071354877 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 52736000, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32000, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 52736000, + "byteOffset": 0 + } + ], + "md5sum": "7f67cb8c6e5505c50ee5c8e1970a09ef" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "b1fdf1cf62f1d2e47081d4aa066d4a61" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "fcf3a5fb21c9ecacfea204f40b316621" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 31500800, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32000, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6592000, + "byteOffset": 0 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 6592000 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 6600192 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11135488 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11143680 + }, + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 11151872 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 29239808 + } + ], + "md5sum": "2b0c03123e554936c92a8cf67f6997be" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "7cc9313ab5a9c4de5be3acf2d5aca401" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "006055412f9c85c4e9987f49960cdbd2" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "71791370902ffeffc4008daac3286da8" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "92d42491cab29547e76bdcd56c3f2b26" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "5c8c8dbfb8f8e65ec45dca1ea002935e" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "700cc613612daa123563c4043956f996" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "3e954e202c23d3e78e81c833b4dde922" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "e158b29ab770a8444d75888e44fa9557" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "580f79470116fb050e2cc40db2e1d3d1" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "115f8bbee9577fa147c999c52a28790f" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "7d81b8f15fc193c11b88795c5713334e" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "2f0d7cf3803c0a92410ce72db71dedcc" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "7d823963e3868f4b5f6990c6fd11741d" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "335419cc588b2367c41ecdfca33ab78f" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "562bc79851a408c4ee973418ca980f77" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "292ef0fdf9bcb0a65faa07439d63605d" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "08667ce83e35e8378dcfdb3793808040" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "9d48a18a65dec911b63af99666aa5178" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "ce9c3905bfd5724b422a34cbf7b73efe" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "9f2674ff4f5f95d89146ba9e2d742db7" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "1071add4d623739bb71058e48527ad04" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "a815120a564e4cb20d1ef765a6a5ac81" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "7317a4b2f3e5c44fa33ee96f349a6afa" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "6d066bfa24437c68365bcc5cf34bc4d9" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "4082f2e536ace9ce9d34be40c3b73068" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 52736000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32000, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 52736000, + "byteOffset": 0 + } + ], + "md5sum": "95f3558b32a5d7aef7d33f4755b1cd04" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 32290304, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32000, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6592000, + "byteOffset": 7602176 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 14194176 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 14202368 + } + ], + "md5sum": "86834f48c8e88499de3f0d74ba3dfc6d" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "37d20af406cdab61d449bb2e4aecb8bb" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 29586432, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 2260992 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 6796288 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 6804480 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 27055104 + } + ], + "md5sum": "b9e281d8ea72ebc6f05294de99a299aa" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "ba6dcaee7d1e957b29f145a71ec8aadb" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "181bf34e268bc2464bf5144e8b876a66" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "ec4b51058311dfa0b54f087d14bfc164" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "4d7e3057e30a978cf3cd4d6794c415f8" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "fb1f21157ac043fd9e72eee134078542" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "db3a0d0acc25acbb12a96caedaf83a0f" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 18087936, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 0 + } + ], + "md5sum": "5f821d2be3d18039c8d54192eca45537" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "c5a84aad16f4bda812e9bf1a6511941b" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 32645120, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 7593984 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 27844608 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30375936 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 30384128 + } + ], + "md5sum": "9ad04c6d7b23b84c6da9c52c98aae625" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "37bd0b9c2d6628bb1985b4ddd9000847" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "e1260d6df2c16b20156e08dd68887e77" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "d6b35e885f43d59f2b0c9b8b2c2d3ad8" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "7d4d2e3ad8cc2cc7933e7f4cb415bc63" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "efb2f4caf5045106641fcc446a6f3d8f" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "861cca4ea264d3cf99e5d91849918bcf" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "d9efdfbc9f0b85b97fe8558aa36f7cfb" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "b062eab64137f8d8226042898dd8c79e" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "a56f35c0f1c8f442df75712429a34279" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "24d466a9777fbaed22fd76609702f5ff" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "9e8be5e9a64a07a26c0894ee50ed7a81" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "d7e32d815061fa7f1b7d2b7bc60040d5" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "a9800661d63deb0eade5568382fc59e8" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "aba1d5a2a4f2705d463b123c0ae0a47b" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "223d105d59b48d99c4aec9af6b035bd1" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "87b81ed623c47ba1f097e2937fb4c5f7" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "a517eb04bd33442ad87032c69f65d16a" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "448175d0a5ef0c1cc348001ff2b91583" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "ada764d26b22c88cd2f3d7700c9054d9" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "37c166684a4bb901e88713357146cea5" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "db6904af780776a12a1689a6e04522b2" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "2250e259c481aff65d3bd111ea357242" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "02a76d2abb10e41fe652d94254f7b4f1" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "d22a3c07b839aea9e8bbd2bb293e8c0c" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "75cc8b0e12855ee1e0a893cfb63f10c7" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 4543488 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 11293696 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12137472 + }, + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 12145664 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 30233600 + } + ], + "md5sum": "83bf6a1c58c7a5833fed9d1596c30238" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "9feee3ea088590eddb23d1087196e2f5" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "dfb46741d6935083e31a53fbb3ab5dbd" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "035aadc265c894e2eecd420964efcdcc" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "60cdcff99209c52c1ecbc0787020f338" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "d771721dbbed49a3e35fcf72e51238a4" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "2b9aa24dde37c8fc7e6f564723e2f9b2" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "5bc7cb1ce49af013c51c1239e3a9f4c9" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "2b9ee4cd1dfbeec260c904043c43f63a" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "367ffe3b027415311194de0650f979ce" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "d7297071db694cbaa0d67a9dc6f674e4" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "db1b360554f8dc2af73506a285a67da4" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "aa43ac8a9ae4bf7e298b85a239763258" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "d56b497f4fb640bb650dc402834d1d6e" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "9772dbed16ae9bfa9137fde1a089e96c" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "5d51fa0f8f85b0b60b2ce56033a391ed" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "7cd3b362ee9fa9273f2a7838d2f1cf75" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "f77a11d1917c2be5c6ce2f3f41e8260b" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "a78e69995e9516c3c1ac6d33c098de61" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "0e7b4ee4854039dde8072cadee3ede75" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "fc293b47f270f590db9d64d7387be2b6" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "52962b364ac517716157337e351e72c6" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "e86ce7ab92b12c6c7a5b0c804895126b" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "831b1395d2fb9be8b06918ff6449a5af" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "d2ce979022a5e600b9d135e7e1db82c9" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "5442fe4f578202a66a5657d7ce6826e5" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "52bd3a883285026f50ee18dbc98a4845" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "5a6fb54ee7e5e4f006fc6dfd74a7aa85" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "d01558f6d7176cb33c1876d1a15b7bf7" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "26fe36ea755a154224f104e519a6fac7" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 33005568, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10125312 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28213248 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 30474240 + } + ], + "md5sum": "e134b4b74571f3871ac7944a5a32e93d" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 7593984, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + } + ], + "md5sum": "18726bb1bca7c943f4967bcc38dde874" + } + ] +} \ No newline at end of file