diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,3129 @@ +{ + "metadata": { + "ParamSize": 195, + "ParamBytes": 7642159104.0, + "BitsPerParam": 16.0 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 197001216, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 32064, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 197001216, + "byteOffset": 0 + } + ], + "md5sum": "20f767987117b5a1dcebabf6ceb91a59" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "61f1db8af316e585e12d4e4e3cafc80b" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "b50d9426082783bde745b574cff002a4" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "c733182448093f13578442060423b75d" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "b0e7e33ce3831ad8c363d1fbc5a22b93" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "9de05f8c7926df22a2425b8f4f7f469e" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "de4cd54fa91e31ff4a0c54bdf0336fb8" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "0db08c985271c24b7181aa45904b1048" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d02d32d6b68c620c83485f2b5a71f00e" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "0cf699d4c967f1a65322ac6216e53584" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "d3104c9cad8990753682da4bc527b3d6" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "3931ea704eea12fb030040fcca194edd" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "41fbee0d20ea0a6e26abbc5675e5cb51" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "620f3504e3c0521b706089a2af826182" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "846695f1cbdeea90f36693780cefd4e0" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "80d0f6ffe26b45cd6b208e0c35038f83" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "5a35a17d62fe5090b3edabb1b7183127" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "bd3dffebfe2ae78418bf5d2b6dd4f56a" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "02a9f85bd57b6a44de5fdb1791f17bd9" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "63c8315f3e50f1c62d47f504fcfa8378" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "576272d67f3cebcd1bfb844e6fa1f458" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "538048ac07d0eb93c90d6b7e92ffeded" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "58001b07cdd83876bf7ca647f0cd3dec" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "151f95512bee7130689b227268b4a1d6" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e1f625cd7b2e37460c8c490460fecf8e" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e935fcaa47bc2044c74855e0287d6a58" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "abd971f2a5417f837893d81aa1dc1cc4" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "25c8ed95069263a500b44c49da89bcfd" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "f110460661855ac871075364dfab4429" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "842cdc072cee78be502c84318a158d35" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "e79e3aa3437df414c4af28889a6200f5" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "370efa1f389053127f8b2af77a858b6a" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "7a3c72bf046d2a21add3cbec89ead424" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "15c8c746f7e161af91eb4e63260919bc" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "afe888a5f5f722523d957cddc3473e39" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "97d464172344e5777e615d6d33a3955c" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "69cfffdc645acbac107fad5b0412ed92" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "5a58ad89dd533102c784f675443873b8" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "ee622d307cf5e57b9ab05541f4bd3ffe" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "a632be1e7644cad9ac7a340d5101c992" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "ba25b183d0421b40ec08efeb78f71448" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "a365f25fa254776b829339c6a8978632" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "5d9f3d17abd4e3251daf4b4fc40a006c" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 197001216, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 32064, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 197001216, + "byteOffset": 0 + } + ], + "md5sum": "44a86159031c9fc01a395e20c3ae4506" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "6337d0a967d6d9de48a9632bdb3bb34a" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "47da65a6649337c973868098b688c5ec" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "6cdf005a73aedcdce54c391cecfe4007" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "4523b6093d311f4c2981978644cc447b" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "9a8016c0f2c44a9fe895b5aeea776f36" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "91e0bea7c0cc087dab66b87c1e60f1df" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "cf316bb79d6513025b15970667e457fa" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "bac4b74453644003a166aa9e322bcf93" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "dfbfc42776e21df9a58a2adf15c33b61" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "586b7352fab4a30cb63980ffb2244001" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e0ae00acb274a5e7347cd89289331a1e" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "b1c9194613504621776bfd3b3d80bf7f" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "fc8ec96e63f5f581e88af35450a471b9" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "c77eeb53cf21870890bac52f7f4bfc32" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "d2264f7b47e3eb57189a18227c679988" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "c4a5d636ff6b53339dfafa7e70fafc89" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "cc810331b696f85e1177a22ad5d0d697" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "c91108c26fe3dc89e3e42431f3194d75" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "30f5b67a47cbb963021ef210cac88048" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "19542189aa95bb61987ac500652af343" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "93b58d640fcc541e074e36f2aba1124f" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "dfa86a99e50e8793e5adf55a1529d725" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "9da84cb116d5b77b93561bf75d1e6d4e" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "f8cbfa3395d170c959e8fea958776793" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "cebb647ca6a068832c7ec017c341c978" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "78c330bd2414152be7dc2e911e957eda" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e4e48f04a3b1979e07387912700fe582" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "afa51c66d784081e691069445c104549" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "3ed0d39c35e4f1aaf76c9fe80ac2e6f3" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "ba05d36f1ad7ebc97458b4b722ea2306" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "2217dfbe652654d167e69d3300a48c08" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "dca332b1c61965f2c062f7ed27a3b6a5" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "b3073f405ca3bfc29f4b6d98f3237865" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "dfcef91847e9edebd7d92e4f40a51766" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "c657399d637544a9536be949361bf07d" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "f5ea2ff98ee7ca89869e78f57013c2e5" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "c6fef22e70c8a9a23d65e8df172f9be7" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "ef9593188a61ebb071d47d4aa8817549" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "311f97f74fb839a10ded0a8fa57293e8" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "a18776ba20cc4ba8dbaa04ed93c3c3a8" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "9b4a42ee4be8368149e7f0833f9a91f2" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "2a03b2facb7603946de71303ce0d2d1c" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "4ef17cf3c9ad9a04bc54dac1e1da19d6" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "2fdf5b3a42ca9c376355527f80d8cbd1" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "5427978f3daf565b02cd6521be626a41" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "ca9f603a7c724662c7d77259f46e999f" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "fb0e661582e237b9d724bc508206cca0" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "14ea17ebc1a3b3bbbb60c933bd623f93" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "81ceefa209f60b613bc97f06fc95f19b" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "5659c29c2380eb15dbb1fda0b80801e9" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "eee5db42e23a7549f45cd4961be23479" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "e3eaa729c1d8713926b584842d0737fb" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "9a5c4c8beb921fb2565ce38ff8a02e1d" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "6369b00d4e67b6b2a0821aeb1119b7ed" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "565b807f59830e3a5d74297f54599d46" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "f8357067cd64c23918897ed4a881fc5a" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "8d5d78c2615021c387748b9f2f656162" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "caa81c64634b34fc00ee954eab5e5e5c" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d81d9883914d4ca194164f07883e8195" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "5737de756b6a370ac814dfbebb2d3c97" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "327848c43160bceb9fb0a11a5b9c771d" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "9374044fa1196bf136569ea5f5208628" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "230dc5eb061bdfbd5bd92c2f45ef4089" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "16d2105d6473164ae5d49a6aca32de90" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "370bd2f270f554214976915619b76df9" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "6ad7d02959b75118951cf941ed05e8aa" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "b9818bce2082f0f49a7b7e7ef9fede56" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "84a51380cfbb01b37253796006d537b3" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "52e3aa55438f6ed0e2d4f118cbfd1007" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "8791c0e8d96e258f56814a5c8535c6c1" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "55c69dd16f67bc22b34572e969e29f29" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "33517d2e365640a76bd48a0cad228d6f" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "ac05e6011a5dbd96150cc70398ba4d11" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "147efe13dcfb6e752d318f82a9e14487" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "6da52157ce76276914d2e16b46bfe378" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "dc2db587d5b187240cc086595e79d06a" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "918d53e693e991259a33bf060f3f15b0" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "5ab2b3b46b94a2bdc489b4f2e6190029" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "6e74f30bb5ff6e2658807bc792bb5d19" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "32e1b51953337eacf9c25db0c05ace38" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "af3fc95837e5539f51b645fb07d58675" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "49a5eeccda1b67320cb5ded507e2bea2" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "86c524ad766428959ea0e1d2552645a7" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e31b7260797f27c9adb2c5aeb0c6de18" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "51aae5483d993159f006f200fcc5009f" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 19273728, + "records": [ + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 0 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 6144 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12288 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18432 + }, + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 24576 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18898944 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18905088 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18911232 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18917376 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18923520 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18929664 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18935808 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18941952 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18948096 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18954240 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18960384 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18966528 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18972672 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18978816 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18984960 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18991104 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18997248 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19003392 + }, + { + "name": "model.norm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19009536 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19015680 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19021824 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19027968 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19034112 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19040256 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19046400 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19052544 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19058688 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19064832 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19070976 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19077120 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19083264 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19089408 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19095552 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19101696 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19107840 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19113984 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19120128 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19126272 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19132416 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19138560 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19144704 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19150848 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19156992 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19163136 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19169280 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19175424 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19181568 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19187712 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19193856 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19200000 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19206144 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19212288 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19218432 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19224576 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19230720 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19236864 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19243008 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19249152 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19255296 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19261440 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19267584 + } + ], + "md5sum": "516d0f4e466a17bb74caaadc17e4fc0c" + } + ] +} \ No newline at end of file