diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,2959 @@ +{ + "metadata": { + "ParamSize": 269, + "ParamBytes": 348686336.0, + "BitsPerParam": 4.502299813734094 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 77791232, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 151936, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 77791232, + "byteOffset": 0 + } + ], + "md5sum": "42d2cfcd9fed316810c440d96f6458ed" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 77791232, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 151936, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 77791232, + "byteOffset": 0 + } + ], + "md5sum": "42d2cfcd9fed316810c440d96f6458ed" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 33329152, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 151936, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9723904, + "byteOffset": 0 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 151936, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9723904, + "byteOffset": 9723904 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 19447808 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 19449856 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 20891648 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 21071872 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 23955456 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 24315904 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 24317952 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 24324096 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 25896960 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 26093568 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 26617856 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26683392 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 26685440 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 28127232 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 28307456 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 31191040 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 31551488 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31553536 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 31559680 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 33132544 + } + ], + "md5sum": "fbc2672d314896d71cc840b2aa6b29c3" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 31156224, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 524288 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 589824 + }, + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 591872 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 2033664 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 2213888 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 5097472 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 5457920 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 5459968 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 5466112 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 7038976 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 7235584 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 7759872 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7825408 + }, + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 7827456 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 9269248 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 9449472 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 12333056 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12693504 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12695552 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12701696 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 14274560 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 14471168 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 14995456 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 15060992 + }, + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 15063040 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 16504832 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 16685056 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 19568640 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 19929088 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19931136 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19937280 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 21510144 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 21706752 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 22231040 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 22296576 + }, + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 22298624 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 23740416 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 23920640 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 26804224 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 27164672 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 27166720 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 27172864 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 28745728 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 28942336 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 29466624 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 29532160 + }, + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 29534208 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 30976000 + } + ], + "md5sum": "3be6a06a2ddba47e82d103faa3ee90f1" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 32194560, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 0 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 2883584 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 3244032 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 3246080 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 3252224 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 4825088 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 5021696 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 5545984 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 5611520 + }, + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 5613568 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 7055360 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 7235584 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 10119168 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10479616 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 10481664 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 10487808 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 12060672 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 12257280 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 12781568 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12847104 + }, + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 12849152 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 14290944 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 14471168 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 17354752 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 17715200 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17717248 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 17723392 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 19296256 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 19492864 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 20017152 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 20082688 + }, + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 20084736 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 21526528 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 21706752 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 24590336 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 24950784 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 24952832 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 24958976 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 26531840 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 26728448 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 27252736 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 27318272 + }, + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 27320320 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 28762112 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 28942336 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 31825920 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 32186368 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32188416 + } + ], + "md5sum": "87d07d05354161f7edfc9dbfe96839e3" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 32925696, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 1769472 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 2293760 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2359296 + }, + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 2361344 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 3803136 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 3983360 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 6866944 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7227392 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 7229440 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 7235584 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 8808448 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 9005056 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 9529344 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 9594880 + }, + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 9596928 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 11038720 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 11218944 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 14102528 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14462976 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 14465024 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 14471168 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 16044032 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 16240640 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 16764928 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16830464 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 16832512 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 18274304 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 18454528 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 21338112 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21698560 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21700608 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 21706752 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 23279616 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 23476224 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 24000512 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 24066048 + }, + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 24068096 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 25509888 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 25690112 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 28573696 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 28934144 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 28936192 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28942336 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 30515200 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 30711808 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 31236096 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 31301632 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 31303680 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 32745472 + } + ], + "md5sum": "43aad0196c77ad98a93059f0cefc9705" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 32194560, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 0 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 2883584 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 3244032 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 3246080 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 3252224 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 4825088 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 5021696 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 5545984 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 5611520 + }, + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 5613568 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 7055360 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 7235584 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 10119168 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10479616 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 10481664 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 10487808 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 12060672 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 12257280 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 12781568 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12847104 + }, + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 12849152 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 14290944 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 14471168 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 17354752 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 17715200 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17717248 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 17723392 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 19296256 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 19492864 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 20017152 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 20082688 + }, + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 20084736 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 21526528 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 21706752 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 24590336 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 24950784 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 24952832 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 24958976 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 26531840 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 26728448 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 27252736 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 27318272 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 27320320 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 28762112 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 28942336 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 31825920 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 32186368 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32188416 + } + ], + "md5sum": "3cbc7c436548c14085870160fbb52e18" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 31303680, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 1769472 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 2293760 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2359296 + }, + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 2361344 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 3803136 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 3983360 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 6866944 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7227392 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 7229440 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 7235584 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 8808448 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 9005056 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 9529344 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 9594880 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 9596928 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 11038720 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 11218944 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 14102528 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14462976 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 14465024 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 14471168 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 16044032 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 16240640 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 16764928 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16830464 + }, + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 16832512 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 18274304 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 18454528 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 21338112 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21698560 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21700608 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 21706752 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 23279616 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 23476224 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 24000512 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 24066048 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 1024, + 352 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1441792, + "byteOffset": 24068096 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 1024, + 88 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180224, + "byteOffset": 25509888 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 5632, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2883584, + "byteOffset": 25690112 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 5632, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 360448, + "byteOffset": 28573696 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 28934144 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 28936192 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28942336 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 30515200 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 30711808 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 31236096 + }, + { + "name": "model.norm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 31301632 + } + ], + "md5sum": "5916f2342a6d0275738cea6d66008b52" + } + ] +} \ No newline at end of file