diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,3521 @@ +{ + "metadata": { + "ParamSize": 283, + "ParamBytes": 1656834048.0, + "BitsPerParam": 4.069857841273854 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 197001216, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 128256, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 197001216, + "byteOffset": 0 + } + ], + "md5sum": "bea7b3700665284b32c46cb59b820162" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d3d29e354396dabcf5fc284baded49db" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 32913408, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 128256, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6156288, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 6156288 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 6162432 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 18745344 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 19138560 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19924992 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 19931136 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 27795456 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 28041216 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 32759808 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32907264 + } + ], + "md5sum": "ff45501a865ba014c28b32ad75268ee5" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "6e3b66dee6a9345df2b29b410fc2c614" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "2c41244b3f01d92520438ba4211cc2b9" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "cb653b1a932d23a45421fdaad39ae1cb" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "f3369a54776b240bc4fad808c40d7f7f" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "abf0a2ca79930f02077cefc5cfcacf50" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "57dba5c22847c32f46d7ed55aaf95115" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "7b4f8bffe34f2473ba57750375a56e0a" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "5f8c53f4e2819df0554f99b5fb968c2b" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "676ea9c3a008f1730ecf1ccdea0a1350" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "7bb909e3de8973500ccae65cf0d2dc63" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "8d8557f6da03f0e080b37204bd757c5f" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "3a1ac46757a30a88beebf622a0c59116" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b918e61bf49b820e4e175e291be839e4" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "79f9c6f01c4f8319211285775f6a5899" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "34c29e3dffc353e345e61bc70676d55f" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "1e71a816245ed8744e2a6e89d7bf1e95" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "35fe279bf2ee52f53a81b5008305c6e6" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "1007f8e89fcee1a6a45f2c746938fc5c" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d63fdf1034ae64773374ddb2ad8bcc4c" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "7c79dc78f088dbc8fa43b980f1fda5e2" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b0c11f8d44c556ba656fa228f6e38dc0" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "c2e4be67c44e700d994c8b23b64769b1" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f65411b7642b0b18c500b41593e4eebb" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "3ab080c0ccc4ea7ccbbdc19faad1e521" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 27531264, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 26744832 + } + ], + "md5sum": "a1159440770d9b311cc175ac65a2a665" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "4c79578e94a1c351852efa723535a4f2" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 7864320 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 8110080 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 12828672 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12976128 + }, + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 12982272 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 25565184 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25958400 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "4b2c8ac996699ac73db771dff759e40b" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "2bb309b7125cf3bc2a2059f002f3bf42" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 7864320 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 8110080 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 12828672 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12976128 + }, + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 12982272 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 25565184 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25958400 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "ded7a94a90de73e68e39f6835fbe519b" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "bfe8195f6e187a6604628e31d5274a16" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 7864320 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 8110080 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 12828672 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12976128 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 12982272 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 25565184 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25958400 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "1231f6765917340d9ab57ea5f0525a67" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a7be16e23bd7a7b97ad5d2b4e1ae20a3" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 7864320 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 8110080 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 12828672 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12976128 + }, + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 12982272 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 25565184 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25958400 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "c59c76cfdc4baeae9cc0d2c82b07a37c" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "43e28bfa99c71771c541adcf711a5273" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 7864320 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 8110080 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 12828672 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12976128 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 12982272 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 25565184 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25958400 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "f09950a64d23304442167e514657a753" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "66b72b3e22eccb891a51111776e4c0ae" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 7864320 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 8110080 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 12828672 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12976128 + }, + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 12982272 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 25565184 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25958400 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "9450dce6ec5df60784d70c5c7e510b8d" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "95ed9a4257dbe1905d24da3e71b9b333" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 7864320 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 8110080 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 12828672 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12976128 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 12982272 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 25565184 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25958400 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "89374bee4f353b2fba6714e033cba37f" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 25970688, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 7864320 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 8110080 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 12828672 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12976128 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 12982272 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 25565184 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 25958400 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 25964544 + } + ], + "md5sum": "eed9ec82343fd1d07f54cef05be22a21" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1e264253b93ae69b85189d1f467a1ce1" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "6b354aa667e2f4d0e772364884cc53f3" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f2702337eb9b684de541c2e6c077706d" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "4419015cf4fcea010969597c9e73d946" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0d0724288b43879730683490a291d5d0" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "b5a962d01ec4070a162e181d94e75309" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b136d5fad594b1705841521ee621c691" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "7dda4c02b8f4837fc827cc9fc5b525e3" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "7cd9318f54e3b6ab4bd5cd756d71f5af" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "ee53781c62fa7ccf74d2d32b33c24a6f" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "92c82148336d420832def3583431dd51" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "d06a88538d56b2f9c32637d86eed386e" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1a503b912f1e7d8cc8700d01cfb31391" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.norm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "012bdae88feb56af3ee300a6e04c58f6" + } + ] +} \ No newline at end of file