{ "metadata": { "ParamSize": 325, "ParamBytes": 4065166592.0, "BitsPerParam": 4.826164192631324 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 65952960, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65952960, "byteOffset": 0 } ], "md5sum": "3e9f0d8f4906d14e7ebb1bfd6d76b1df" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 65952960, "records": [ { "name": "lm_head.q_weight", "shape": [ 32016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65952960, "byteOffset": 0 } ], "md5sum": "f2788857360e466befebe30e5c7503e3" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 25313280, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 0 } ], "md5sum": "f46f09330ca90fabc7d9ac8afb05c109" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "6a580f3f776072e15f2598ce5f0cb4ec" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "cd631a9d2ef60ac84f454b41579da555" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 25313280, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 0 } ], "md5sum": "30737a8c65153e14512a94f6b0610238" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 31832512, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 32016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 6595296, "byteOffset": 0 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6595296 }, { "name": "lm_head.q_scale", "shape": [ 32016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 6595296, "byteOffset": 6603488 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 13198784 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 13206976 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 15738304 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24176064 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25019840 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 25028032 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 29563328 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31824320 } ], "md5sum": "5ca000c3ed8c29be30f888fcc68cb19d" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "606750825d756ff72d308731d7999516" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "ac04a3deadf9baebf017e8d75fa7ce35" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 25313280, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 0 } ], "md5sum": "ead0e5361f59a0866644d3e56d289533" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "d19d5560674d55c540cc730e5f935bcb" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 30446592, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 2531328 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 10969088 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11812864 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 11821056 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 16356352 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18617344 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 18625536 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 21156864 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 29594624 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30438400 } ], "md5sum": "32efd138d9211984f5fd9ae8dec1ae11" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25313280, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 0 } ], "md5sum": "cdd1724d334b2a249f164ba6a661d640" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 31945728, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 4535296 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 27145216 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29406208 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 29414400 } ], "md5sum": "42a816c729a3cd6b1d4fe8f22b9b3181" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "332329a61a6a6a9616ebb3ddf2d8ef24" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "563d28dda288cb5aab62425bcaebaf1b" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 25313280, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 0 } ], "md5sum": "a39447b95c746ca915301ba628c967de" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "c2cd37e6497384a1ce1c2588840cefe7" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "4cdc8b60338e88a8b07d92123afc283f" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 32450560, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 8437760 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9281536 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 9289728 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 13825024 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16086016 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 16094208 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 18625536 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 27063296 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27907072 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27915264 } ], "md5sum": "ed7474fb35769d0ee9a2bb46dc50479f" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 30113792, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 0 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2260992 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 2269184 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27582464 } ], "md5sum": "d8f0abf86bc7a2a530808e0051ca16c9" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "a9ce8f115ffbb0644f0e3183ad53914e" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "c3df7675caf4c3e5410a7fb78ea0057c" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 25313280, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 0 } ], "md5sum": "845a0a8137a01afb3d52c73e83d292d2" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "511c318725f0b294441e80a687cd5a2f" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "4bfe587d4632d4a904fb8f6764a23e3e" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 32450560, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 8437760 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9281536 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 9289728 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 13825024 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16086016 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 16094208 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 18625536 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 27063296 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27907072 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27915264 } ], "md5sum": "0bed92fdf24bafa29e49f2e690bed37f" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 30113792, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 0 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2260992 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 2269184 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27582464 } ], "md5sum": "685e87e1c511185f404d135910914b84" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "d76b1a3017657f506ac390f0f49c637b" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "c888eb31a05cb69e2579c02f64d8585d" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 25313280, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 0 } ], "md5sum": "aaaa271ed5bdbf69204752bfcae23977" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "f24031b8a6b6ceb9f76e0e306a3f8fe4" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "467eae9a5071ead0928785399b8cbb05" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 32450560, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 8437760 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9281536 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 9289728 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 13825024 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16086016 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 16094208 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 18625536 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 27063296 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27907072 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27915264 } ], "md5sum": "2cdbba1aab18b69e3650b3c55218ad3b" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 30113792, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 0 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2260992 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 2269184 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27582464 } ], "md5sum": "c24932f73f3c31c359a1965bfa559fd0" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "881916c8a66dbf48f280fc0d813a16d3" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "a4b193c0c04327ba6006c83b9c7d1b79" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 25313280, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 0 } ], "md5sum": "3098bd5ac1aa7fcd27241e6c72aaecc2" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "fccef87bcf00e5b8cb405dcd4ff0269f" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "c9e97f57ac8fa6f090c1fd5d266e8001" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 32450560, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 8437760 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9281536 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 9289728 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 13825024 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16086016 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 16094208 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 18625536 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 27063296 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27907072 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27915264 } ], "md5sum": "6ee20a846c1ab013c924c3cd964ff8f0" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 30113792, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 0 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2260992 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 2269184 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27582464 } ], "md5sum": "707cd6a398406ecbce8d25b8ecc69e50" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "ed6dd61a68ffe170b4cf4e0d9c2a2aa5" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "3f0c5c25ec4b342850a412d8ce46d05a" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 25313280, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 0 } ], "md5sum": "aa6faa03725f38e4f10f0f96b9b57c18" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "626cac8dfa60cf44e5ccf72957825529" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "9f9dd7b7779bf19271cc5db9ae44dd0c" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 32450560, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 8437760 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9281536 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 9289728 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 13825024 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16086016 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 16094208 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 18625536 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 27063296 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27907072 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27915264 } ], "md5sum": "ce9815749c9b25bd426c46a28ed350f2" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 30113792, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 0 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2260992 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 2269184 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27582464 } ], "md5sum": "87ac07c34bf90b3dba866889430a3d49" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "9e4b93cc276213a32ac8202d8a47b455" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "4d8fe0cace19f51b5a1f4bbbd97348be" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 25313280, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 0 } ], "md5sum": "f9b19cf4bfbfdf144e96e4cd0290d608" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "79c63f45e22636328f858a7eb1c2ad27" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "bb7d0e6a7d61cd974ca51657252ca429" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 32450560, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 8437760 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9281536 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 9289728 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 13825024 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16086016 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 16094208 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 18625536 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 27063296 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27907072 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27915264 } ], "md5sum": "91b5488d429c1369ab6d3a422ef1a203" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 30113792, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 0 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2260992 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 2269184 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27582464 } ], "md5sum": "8a6029d0b01f05cb678b0a812b35c410" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "106a680297319126b035a6800296b3c2" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "a9fef6091b79ec19d70b4d7d139c735b" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 25313280, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 0 } ], "md5sum": "8bad3d061476a9e9e09b798272e240e4" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "d1b42943faa91b04c12dad5b5729221c" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "f2db0a8e54fee87a80d729f6e4aecb89" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 32450560, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 8437760 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9281536 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 9289728 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 13825024 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16086016 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 16094208 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 18625536 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 27063296 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27907072 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27915264 } ], "md5sum": "9ec1bb71ef7b4df5c3c1fef82c618930" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 30113792, "records": [ { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 0 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2260992 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 2269184 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27582464 } ], "md5sum": "8d67be05359f6c4f110f937a2f7d3dec" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "a2fd9103fbd32a01030ceb3a49949f1d" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "d0e6e23e49c0dfa46a7ac9df23f407b1" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 25313280, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 0 } ], "md5sum": "5fa4f8a27b07319b8cdfe6fb04005f06" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "af7660dc3fa83e0b387965ddfbe3b296" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "3e3de0d62c36bd1c62e02d6932490b7a" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 32450560, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 8437760 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9281536 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 9289728 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 13825024 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16086016 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 16094208 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 18625536 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 27063296 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27907072 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27915264 } ], "md5sum": "c875e5605a68eeb8202ec84b474b8b76" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 30113792, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2260992 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 2269184 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27582464 } ], "md5sum": "9e83d14108237d02c1e6e9a651a0320d" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "a87ac6470eec8259de9b63222fa868e9" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "dac0f9fe3b6fd6ab74f8400072aceff9" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 25313280, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 0 } ], "md5sum": "5290e4371521d59549739c6cf585c18c" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "afabb12f1d30bd920dd20de09148d9bd" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "3457c563ce43c660d40d7fe7c800dc97" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 32450560, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 8437760 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9281536 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 9289728 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 13825024 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16086016 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 16094208 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 18625536 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 27063296 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27907072 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27915264 } ], "md5sum": "4ff3d7c21e3500017de37a2d39dd8d05" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 30113792, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 0 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2260992 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 2269184 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27582464 } ], "md5sum": "d4e3ea7d14cd24a8d4dd8d7e28757337" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "76716aa722eb9e6b08ddd8e5d36e206f" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "6aac28eff8708efb49de2fad6fe89cfd" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 25313280, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 0 } ], "md5sum": "abe29f9b083dacb5754484b6d678a934" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "23c404a556c2473da372fa5f68f38963" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "964d1a5ec4eb0f413b4900deb9abd033" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 32450560, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 8437760 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9281536 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 9289728 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 13825024 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16086016 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 16094208 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 18625536 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 27063296 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27907072 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27915264 } ], "md5sum": "31d54601cfcbb93fb846a1313e916637" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 30113792, "records": [ { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 0 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2260992 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 2269184 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27582464 } ], "md5sum": "d44cf9a33948c64f149e1099ca55d0ae" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "f2aba2742b0641f096672ab90e0b76dd" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "36ff7a8c6fc0c110e8c178e1f91f64ee" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 25313280, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 0 } ], "md5sum": "5630a8f833cebb2bcb9624fabd420b66" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "aa3b1f34acbeedaf13b646b10b2c9839" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "1f6c6a1520018b5266af675aa0d2f5d3" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 32450560, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 8437760 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9281536 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 9289728 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 13825024 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16086016 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 16094208 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 18625536 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 27063296 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27907072 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27915264 } ], "md5sum": "701533d620d7e4ad2d51fd0ff204a3c8" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 30113792, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 0 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2260992 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 2269184 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27582464 } ], "md5sum": "1db408bc08b30fc1128107cd0b9318ae" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "f99426c5f5c1cd3fd6373572d86b5185" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "d10d70cddd812838bd2a745ac8cb7dea" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 25313280, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 0 } ], "md5sum": "dbe197173042fdeb2065b759c37bb8f4" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "7558277f67d83639fc9c74be15c770b0" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "43a9026b68611eab9cb879fef9e1867c" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 32450560, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 8437760 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9281536 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 9289728 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 13825024 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16086016 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 16094208 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 18625536 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 27063296 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27907072 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27915264 } ], "md5sum": "c7ca1f5a2d73ab6e1625f5eef3343c65" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 30113792, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 0 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2260992 }, { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 2269184 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27582464 } ], "md5sum": "24605f23111849e6e8c4736db8e2e5de" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "dbd7542eb8f91d407f2a2d497a905a26" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "c3b2a8508fe9c98f2951a84075ec209f" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 25313280, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 0 } ], "md5sum": "42c8b9d9a3523d664b9e082f521b3f12" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "aa6d57da91f815617026fdc4c748f565" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "97ce29229ccca74ae0795d66b563454f" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 32450560, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 8437760 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9281536 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 9289728 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 13825024 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16086016 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 16094208 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 18625536 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 27063296 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27907072 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27915264 } ], "md5sum": "3653d4594a72e58d421d30fb8d16521f" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 30113792, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 0 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2260992 }, { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 2269184 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27582464 } ], "md5sum": "86a2da0e12fc7e468cf584f9c5fc429f" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "cdef0b5a701930bad74cd983b72e29d1" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "9c32e88d29c77796772eeb389147db30" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 25313280, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 0 } ], "md5sum": "5b8acff3a08611ca314507c5cba3ffc0" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "6d3dba68c6d157b6b2a7acae8c1b26e8" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "f9a68afaeae71fdfa1c6f7d25dcd17fb" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 32450560, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 8437760 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9281536 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 9289728 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 13825024 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16086016 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 16094208 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 18625536 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 27063296 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27907072 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27915264 } ], "md5sum": "b8589adf09fef20aac0fffd8d26a499d" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 30113792, "records": [ { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 0 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2260992 }, { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 12288, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25313280, "byteOffset": 2269184 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27582464 } ], "md5sum": "b7e45faffbb62a347d7b1dde5bf9bcfd" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 45352960, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 22016, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45352960, "byteOffset": 0 } ], "md5sum": "93e9c8b4f4d113e12eeb0496a4d7e16a" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 22609920, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1380 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22609920, "byteOffset": 0 } ], "md5sum": "4b25dd42c0e625baca5a046c22bb4c66" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 16086016, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 515 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8437760, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 8437760 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9281536 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 9289728 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 13825024 } ], "md5sum": "1daed501e1ea0213bcc63eb47097a986" } ] }