diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4071 @@ +{ + "metadata": { + "ParamSize": 313, + "ParamBytes": 4284263424.0, + "BitsPerParam": 4.500503319461262 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 272498688, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 152064, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 272498688, + "byteOffset": 0 + } + ], + "md5sum": "0d8f60eae6f751a97b8435b81e540c27" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 34062336, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 152064, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 34062336, + "byteOffset": 0 + } + ], + "md5sum": "c3406ca71cee30aa1a37cf5985b8f18d" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "1ebca7d0c1fd215e0847ca859e44d0dc" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "efe56d38543e8dea268a1559a507527c" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "341cd3a5cddfe1235499d70650ffbb83" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "648b5d41e436a82340637aff2798d6a7" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 33533952, + "records": [ + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 0 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 7168 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 4250624 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 4257792 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 4264960 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 8508416 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 16995328 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 17002496 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 17011712 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 25269248 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 26301440 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 32723968 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33526784 + } + ], + "md5sum": "f8ff26a10f253ebda5d435da17ae97ed" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "abc0500c27ed4f4c9c0db85870eda28c" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "784f10a64e790a4103ba2a328d61d296" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "afdcf750f465c0a21dce71be1a28bcf9" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33512448, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 4243456 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 12730368 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 12737536 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 12746752 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 21004288 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 22036480 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 28459008 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 29261824 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 29268992 + } + ], + "md5sum": "0fb6ab3322ed287bb0b26003636557e5" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "0f62100e38997e7473bca7d3dfbb0d2c" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "8dc0b4f4c74b6eec68d217a0cd09b5ae" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 0 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 8486912 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 8494080 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 8503296 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 16760832 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 17793024 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 24215552 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25018368 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 25025536 + } + ], + "md5sum": "cdc779f572a424fceef0e2f827b7ea37" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "65bd48d9a817de1737fdd9c0f30731b2" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "e1ffb29bad8b769c9fa37d5f71a1bffe" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 0 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 8486912 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 8494080 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 8503296 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 16760832 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 17793024 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 24215552 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25018368 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 25025536 + } + ], + "md5sum": "697749df4523891808be97ae31ceed3a" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 272498688, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 152064, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 272498688, + "byteOffset": 0 + } + ], + "md5sum": "3eda79b3c20a16104cfc3a810b2fe6dc" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 34062336, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 152064, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 34062336, + "byteOffset": 0 + } + ], + "md5sum": "d633441717968a602bf3864b0e620ef7" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "0632e25863df5096fb6a9119fc89156d" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "1fda016e6c5f33f0fd276f74a53deaa0" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 29276160, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 0 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 8486912 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 8494080 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 8503296 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 16760832 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 17793024 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 24215552 + }, + { + "name": "model.norm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25018368 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25025536 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 25032704 + } + ], + "md5sum": "1a4731a5ab8501ef68ca2495bbd6e560" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "1d27e3c18dc2baeb04e493efae65a746" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "65e8455b878306a132b7a531143cad9a" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 8486912 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 8494080 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 8503296 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 16760832 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 17793024 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 24215552 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25018368 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 25025536 + } + ], + "md5sum": "96c129a6dc48d5b025922901a832b14b" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "b2eee205d302c7fb94873fcb2c21989d" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "bc4355482a99e16358d338f7e3731b6d" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 0 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 8486912 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 8494080 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 8503296 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 16760832 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 17793024 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 24215552 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25018368 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 25025536 + } + ], + "md5sum": "5abe325058198a6acdde04485609f899" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "b576e22b483eda84d3b2335e7aa431c2" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "65d22eb4e66f9505c2eed6ddc4af1e32" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 0 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 8486912 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 8494080 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 8503296 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 16760832 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 17793024 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 24215552 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25018368 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 25025536 + } + ], + "md5sum": "b22fba03039496f35440fc3743957d1a" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "53860c3e03e45706e45eab6ef07a4b66" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "b949998700ef2fa6b42991de2673de36" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 0 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 8486912 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 8494080 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 8503296 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 16760832 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 17793024 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 24215552 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25018368 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 25025536 + } + ], + "md5sum": "1e8a918e9d67766fdc703ce7987e6881" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "a7a16df11364d8f23b52c23cf6c2c1b4" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "5cb01e6a2c4f544bf80ca735b55b534a" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 8486912 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 8494080 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 8503296 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 16760832 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 17793024 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 24215552 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25018368 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 25025536 + } + ], + "md5sum": "7fefec10933b4693f1fc2373b76d42e6" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 33285120, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 0 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 8486912 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 8494080 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 8503296 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 16760832 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 17793024 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 24215552 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 25018368 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 25027584 + } + ], + "md5sum": "2045010507c817e6dcf94b0d8b48177a" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "fe42d4588838dfc0b06dfb9ecf7a13ee" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "71a513755da3e8b5b7f0d9d8540ea184" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 30301184, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 1032192 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 7454720 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 8257536 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 8264704 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 12508160 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 20995072 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21002240 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 21011456 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 29268992 + } + ], + "md5sum": "cebe0e43ef4c85deaa6d4fe42642f496" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "2d1af027692369e76388347baf52c58b" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "e1f7af863e5c3d2f5f0775de111bdcd9" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 6422528 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 7225344 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 7232512 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 11475968 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 19962880 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 19970048 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 19979264 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 28236800 + } + ], + "md5sum": "72ce148022d72c29ee76d412b4cda63c" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "0013edce307e442c38fde8d635a66c94" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "2634bb985e454bdf1517001eadc9375a" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 6422528 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 7225344 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 7232512 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 11475968 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 19962880 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 19970048 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 19979264 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 28236800 + } + ], + "md5sum": "537d0c87708150d62f52f39603ff61b6" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "766ac835fbef78418752a22e8c59e4f7" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "7cd5c8839513ab12a300213e8bf76866" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 6422528 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 7225344 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 7232512 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 11475968 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 19962880 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 19970048 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 19979264 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 28236800 + } + ], + "md5sum": "07020932b88efd9dc8a8b808afbb57e5" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "867f8c7405edea33059cd7a7e619d50a" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "658e08c93edcf908a23b727310278a75" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 32243712, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 6422528 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 7225344 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 15712256 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 15721472 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 23979008 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 25011200 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 31433728 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 32236544 + } + ], + "md5sum": "0941ca267e115ed8e90658275e262081" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "3b33899769fdeaee8f59e5137d1d2282" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "1525b3ea6661c399342e353fb7f0e49f" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "6a6989475f5f7cfae48c6ddb1dff1371" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 25491456, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 0 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 4243456 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 12730368 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 12737536 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 12744704 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 16988160 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25475072 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 25482240 + } + ], + "md5sum": "07718c1e3e5702d404a05ad6fc4c2e30" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "a9176a5325fceecda9f25c591d463152" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "bb0348a3420520afc126ecce3ecdd0d1" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 8257536 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 9289728 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 15712256 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 16515072 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 16522240 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 20765696 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 29252608 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 29259776 + } + ], + "md5sum": "7466ce50e8bd5bf67a03f59526d3ae0d" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "c4e70a149065ffeaace288c1a5d4e089" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "4a878b3655655426fb25ab3c87b020e6" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 8257536 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 9289728 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 15712256 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 16515072 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 16522240 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 20765696 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 29252608 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 29259776 + } + ], + "md5sum": "a13534f8c484ae726948edbe8fb55fc6" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "61eadbb619d29f7d4569a507f8ce8642" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "4fd60eb59def008759e7cc57a314442d" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "8470851d5ca00027dc15dda8c01e8d92" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 33526784, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 8257536 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 9289728 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 15712256 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 16515072 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 16522240 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 20765696 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 20772864 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 20780032 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 25023488 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33510400 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33517568 + } + ], + "md5sum": "31977994418d884afa6afc4b12d1113f" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "132c23db8145373df54db47243c5841f" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "98343a0908ab1695c4267e0c759e0e72" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 8257536 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 9289728 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 15712256 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 16515072 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 16522240 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 20765696 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 29252608 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 29259776 + } + ], + "md5sum": "c629b53cbeaa86043bd3141ce337e453" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "27b721a52869427e9cbd342d6a9e2932" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "99ab5ba735ccc6cc7ad8c2a2a3a369f7" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 8257536 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 9289728 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 15712256 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 16515072 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 16522240 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 20765696 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 29252608 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 29259776 + } + ], + "md5sum": "da5cd719a15bf279e56fe27e5da7693b" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "c35adfb50a33ad283b896efe1777f65a" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "dfeba30d374881ecbccef8f8747a0c08" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 8257536 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 9289728 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 15712256 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 16515072 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 16522240 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 20765696 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 29252608 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 29259776 + } + ], + "md5sum": "fe77a9273e1c6a055a6ef3ea8b14316f" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "166c828c08184d85d8d37008c05b82d6" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "96a4566a656c97a19c85c2ec9ec66508" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 8257536 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 9289728 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 15712256 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 16515072 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 16522240 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 20765696 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 29252608 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 29259776 + } + ], + "md5sum": "1883dd59a34a1b51163a9b42b511df0c" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "84baeb604ab2f4911265a3b410df7a48" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "113c955beef654bf82a594567d25546f" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 8257536 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 9289728 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 15712256 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 16515072 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 16522240 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 20765696 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 29252608 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 29259776 + } + ], + "md5sum": "e1f46493f00364d56fff5dce2b91a3f5" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "947b27cfcee2a76548d661547141bd77" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "426c3433f2c580c0292faeceeed12d25" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 8257536 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 9289728 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 15712256 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 16515072 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4243456, + "byteOffset": 16522240 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 20765696 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 29252608 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 29259776 + } + ], + "md5sum": "28fc2fa3ae16c4daf13143c676f754e4" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "f0627d5d5cb382fac3d5eb2d230122bc" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 33268736, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 8257536 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 9289728 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 15712256 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8486912, + "byteOffset": 16515072 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 25001984 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 25011200 + } + ], + "md5sum": "75f2123ec5afe8cd2df7f06dd895fd69" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 8257536, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1032192, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 1032192 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 7454720 + } + ], + "md5sum": "a8c853eea55d2547d6a5089801e8469b" + } + ] +} \ No newline at end of file