{ "metadata": { "ParamSize": 325, "ParamBytes": 3048549376.0, "BitsPerParam": 3.619307029695688 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 52736000, "records": [ { "name": "lm_head.q_weight", "shape": [ 412, 32000 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52736000, "byteOffset": 0 } ], "md5sum": "07f8e55ee35dd00e8f86c07782831d59" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "8c8924c4b1b18cb8c2c3a6317e9311b7" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "33efdffc1b6af95dbb3c7662daf303c6" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 31492608, "records": [ { "name": "lm_head.q_scale", "shape": [ 103, 32000 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6592000, "byteOffset": 0 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6592000 }, { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 6600192 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 24688128 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 26949120 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31484416 } ], "md5sum": "014baaf8004e09d80979644eb5ec1c54" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "c338611843ea40c62cb24b6e4c93cbb6" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "8da9f9a4295e2d3f5a78aa0e84653166" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "ebff3a5b74713ed1c1598e05db64588f" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "bbd4cf405a5bc7bf3dc59196a10111d3" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "ede45b002a11271404cb56003536786f" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "b93fee99e544bea6ae287d733ebd01d3" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "32a08f29bb221aece7e6d5af581be687" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "b89dfbc15b98c44fb1d0b8c3528bd92d" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "b0f904587eb07e55f953e91cda01f522" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "9ac9441c2d7a71eb10362af76fc21f45" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "fedd6aa7492daa7b84d3127cd5279e60" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "e47108f54248fcf890830ab17325cf10" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "f0072616dee633be7e0353a1ec666338" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "3c3feb0be5448e5de8a785c63bb07275" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "dade4b0908536a6fdccbfa2bc0d7d4f9" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "11d7e812a942b26379d13516250afe92" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "bc69fff619c6d4176ef39b17ba5601ef" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "f3f086b103d81adec7f7ccb3a5a82b34" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "40ee8443d3a9c5ca5ba0528d19e0d735" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "6089df8d8ea3e30dda7faca94fd6a7f2" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "c8f4c86e79a99b6c27598c1a293b4203" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 52736000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32000, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52736000, "byteOffset": 0 } ], "md5sum": "d1ddcaea2b2921ddbe501527c197dd71" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 32290304, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32000, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6592000, "byteOffset": 7602176 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14194176 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 14202368 } ], "md5sum": "6a9299266ad2749026b9807da9fc79dd" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "0e176568fb4600bf8e2ae1171492c732" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 29586432, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 2260992 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6796288 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 6804480 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27055104 } ], "md5sum": "cb5226c2789aa3b257c4c6c282ff74f8" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "cf6dda7feef58c9ebd891ce80e70696e" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "6170f936c8fdb4d929d697cd2fe5f0bc" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "cb19c222e49ceb3d605f18e0dddf8c88" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "997cd72ab22e9e4c006ff36b4b535f89" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "41fc28efcf19bc0a4a957a4380cbc7b7" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "a58ef4bd17bf66d27d834579ec046177" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "7485fb56ad430d6dae9aeef0077691b5" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "f8eafaf364bf582016c90b99f5f64d03" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "4b31bad9b66b474a98a01bf6fbd2ea7d" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "0b49dd2ead668760eaba10a2550a62eb" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "37d6b78521b049226a2380cbaa3d12d6" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "17752870d30585ab75924279a5391832" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "3a25c3c068f14c0f4a921a9eef3baa3c" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "b399b8c263550a0477c24784f8fc90a0" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "29fb0d329829424c32f2c6e4cc96343c" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "7e2dd5062672a1f21c5ecdfb850b932d" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "a94778c911505dd60d440fb5fa77e8c6" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "aba8ad42e51ce1820f5ac17404bc6230" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "0bcdb8d6420f8cad7da4ee1f4e19d123" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "02bfa3e425b0a24930624a54c5dd2bee" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "cbe33aed3e352e75518ec0e2ec71887f" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "0640e445724c7d470b17fe6e3d2ab51e" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "b968a802eb13f012a8ac1daddd4b462c" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "334a6dfe5f04de64cd375266cb2e1028" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "671348351c8d3facaac89c7484fe67d0" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "059bc02c6f588a8eb3fb0d3a5df6823a" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "7d59a720274bf7f3d2c0eb4c17c4d643" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "5ade31c6ff5fe904c3d8f16c36c19d50" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "82a3cca5de13afdc2e99d944a6a37d94" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "ac862c2f9287918adbf697ae3165b0a0" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "8aacd7df5d3591a37bf68a9107e3c9bc" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "913c0a41d8d09a7dc84f63931b85c2d9" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "b342f3390e81dc2d6ba4ee6f27aedf77" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "0e7e99aef4f64ae58475f8716ee278b8" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "c66da7d9d01dc75b10cc91988b5be9a9" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "2b0373dbce085c407c71cb8e81b94b56" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "2e46d6ad6badc60a785880df8e1ca7f2" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "554ec4304520e245f1bf638b7b2bd742" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "87ab2571248d7e00a9b3e7b34184cf6b" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "0feafaa40f87d81befa4a3170da607ca" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "9bbb03718be11e47ec709a6479bc7379" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "ee095c22c5701beef624544e384c9002" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "b28aff858a0cf03441dd7136f93e5a35" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "78091e2424e35bb47df4bd08ba92fa6b" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "4a26feed4c8b74e73ba7d8a3393661ec" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "035a01e1016360fbc79ad49793c9489a" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "274c758862ce360905927fd2724e31a1" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "925a02a7ff20c5f5f79ed6ed8181e296" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "e7c9d6604f23e96e464f2a63a4df91a1" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "c297ba633348549bc8684f5b5261ec57" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "f0448c76fffb6a19587f0af01c4ba60e" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "a57f89a172332fc8fb7c177c8bd5e0ed" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "94dee99ac1ad914c4edbe86c87b5b1b7" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "dc02e4323568bb8159364b46f0a0c490" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "a05c11d9bd4a3a39fd47c3fc5d0294f8" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "8fc34b2bd472b29ba356d948aebbadbf" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "e0c29c82d04fe7a8caf23d2ee849a106" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "4bacb5c87ad3fda5faf5e1ba6a9eb053" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "2fdc8c38070362b0fc8c399d3e2dc6af" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "5588b35b272d254821d56587b9bf1e08" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "196be6a9cb1621c3b17f7564e6261c91" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "26f3b40ee4efbf77a3ee80ce7b45d766" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "935a62e62c30d3f4e5bf045260c590d8" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "8e49bf3469a0e1197703bc3973179e53" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "31ffd7333f847aa2946757b68902806e" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "b1c53c33e63be8815d892e6b089ade7b" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "004dbf4412b46538d0adabd067931d97" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "9a498bb9fef5c24fad84780b62037db8" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "20f7b7d8f90fb15604cdc9c7c0d379e2" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 10125312, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 } ], "md5sum": "2ecbea5a6fcb4b97713059b075dff1ac" } ] }