{ "metadata": { "ParamSize": 313, "ParamBytes": 3879214080.0, "BitsPerParam": 4.07500989461587 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 272498688, "records": [ { "name": "lm_head.q_weight", "shape": [ 152064, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 272498688, "byteOffset": 0 } ], "md5sum": "e67c832b0e8b5c4ae277c0c4752a654b" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 34062336, "records": [ { "name": "lm_head.q_scale", "shape": [ 152064, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 34062336, "byteOffset": 0 } ], "md5sum": "8aaffd7cef0304a9b56e761087309751" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "b114cdb812dc650ac33d978059f845c4" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "971659de687778fd2d064d1d2b6bdc75" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "738091766ce04bb71782e3e9f0ffffca" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "78c6b522c9ac96c582fc37ddf65dae4d" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "d3a482128d490d5fc280de6faabed558" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "42c60ac1ab1044da94ab5bde37342a85" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "9bc16ed6bf32fc0d31b02841d6968ff4" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 29719552, "records": [ { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 0 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 7168 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14336 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 21504 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 97280 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 104448 }, { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 113664 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8371200 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8380416 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14802944 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14810112 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14817280 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14824448 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14900224 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14907392 }, { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14916608 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23174144 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23183360 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29605888 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29613056 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29620224 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29627392 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29703168 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29710336 } ], "md5sum": "36013e8acffd8602acab9e5bdceb11b0" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "1f4129a3f079b5c2b70f4ab95d7595c7" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "602eeb671e2203a1c94a70073396807c" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "5fd55a60e06ead1d4890c5af2288ffa7" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "069bdee321a12786c3721cfe2d2d60aa" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 29605888, "records": [ { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14696448 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14703616 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14710784 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29499392 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29506560 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29513728 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29596672 } ], "md5sum": "fc90e93d8d1d0293da441810cf9eccfb" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 272498688, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 152064, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 272498688, "byteOffset": 0 } ], "md5sum": "1d340c9328e65098be1f7a6a9e21ed9e" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 34062336, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 152064, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 34062336, "byteOffset": 0 } ], "md5sum": "8009ea47b18c21b2da5f5f0ef4891cae" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "bc151229a8af7fcb426758ef86d1fd35" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "e7a96e8f5cb1aeab2b61aa96dfc698a1" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "98f31d5624cb1115a7296a985da53a66" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "35f9010f951841a9cdb5c682e8fa8643" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 29613056, "records": [ { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.norm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14696448 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14703616 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14710784 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14717952 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14793728 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14800896 }, { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14810112 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23067648 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23076864 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29499392 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29506560 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29513728 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29520896 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29596672 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29603840 } ], "md5sum": "65b57647aa1eb48370b9ff9710cafea5" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "41ae03bd5014b840c52c204ac9ff6ce4" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "2d99bd13fb36002d031ca05e8f01fad1" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "801c74407aa1286cd07c5b62e4e8b2f1" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "09b6aa5a514c70cc40887e744e1afdb9" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 29605888, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14696448 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14703616 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14710784 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29499392 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29506560 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29513728 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29596672 } ], "md5sum": "55bdd7eef38416482cbf5fe1d1af94d6" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "d5a3c8a5859e65a1978daec2f15df7d9" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "7b7891af9f77dbcd5bb33805145104a6" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "8172e6221ead03cd0c1be64ca5ce389f" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "8bd5c6f650dfa6db9df6912c0737754a" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29605888, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14696448 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14703616 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14710784 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29499392 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29506560 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29513728 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29596672 } ], "md5sum": "eea925bcd381e5f820162fdf94e7c846" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "a37f92a3d738e06114948e402f22f921" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "2b2854419f911014dfaac60dd4e57991" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 29522944, "records": [ { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14696448 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14703616 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14710784 }, { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14720000 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 22977536 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 22986752 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29409280 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29416448 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29423616 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29430784 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29506560 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29513728 } ], "md5sum": "79c4e8f61024a4506e40732603b021be" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "d8c2cac00be25768ec50fccf229d71ca" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "1829fdb4763feae50a2877c863918aad" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "272f8486daf51567d160cd192ed1624c" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "c2ce108ccf36391a7f51d1393f2f6551" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 29605888, "records": [ { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14696448 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14703616 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14710784 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29499392 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29506560 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29513728 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29596672 } ], "md5sum": "b8e28d62962ec7e1bc9c0234663ea089" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "d409c9ea05d4219a31f7f3a9fdf6ab89" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "7c4fa4ad19488c873315be3eb96c646d" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "3428efc520365f08c1cb7157d949813c" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29598720, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14696448 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14703616 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14710784 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29499392 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29506560 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29582336 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29589504 } ], "md5sum": "aeefc44a38ffe316572b5f16c8fecc6e" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "0960a1f95c3601bf252920f47ee0962b" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "9b78ee9b01319380bc9551fa95849d63" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "45e932f5fbc07de42552c58eece95dcb" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "d638935143ce8e9dd4cc57ad05ec96e9" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "eb8d5a02313990a9ec7ad23953fd1cbb" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "d23632a3a1089a16b9ec0a13a22a1ce9" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 29688832, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14696448 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14703616 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14779392 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14793728 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14869504 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14876672 }, { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14885888 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23143424 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23152640 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29575168 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29582336 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29596672 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29672448 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29679616 } ], "md5sum": "8b99428d3d61592c0c2a635e98205d05" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "e027ce95a18e2974e4cefe0f5df835f1" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "a7f6d9f007907882cd25d52b2348f312" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "3b4bf8a0756727e2f5e853364e03c4fb" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "89342d044dc9af890b4208f574e1883b" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "f7f836517c543ff6dd9c1f4f6eb8659e" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 29613056, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14696448 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14703616 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14710784 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29499392 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29506560 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29513728 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29520896 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29596672 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29603840 } ], "md5sum": "a85a98866b340235e069025aa08458a9" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "2efc25421b1ac333e97254da97a8344b" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "e032974147334a7a0b98656a24cb662c" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "0d33b6d10b82e4e9d71dece9f13e1cd3" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "4a1519942a45a3e2c2deddc29c2cdf28" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 29605888, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14696448 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14703616 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14710784 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29499392 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29506560 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29513728 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29596672 } ], "md5sum": "ab7331ff34701ba080cc57c66378792b" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "6fd87d6696b54b0b5fd0dcf8fcb508c3" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "e2e1214ca04671489d17073c439de25e" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "ee28b730987b7fde90c7e90a7df845df" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "f02b99d8df443964b36a0d0c0211cc06" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 29605888, "records": [ { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14696448 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14703616 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14710784 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29499392 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29506560 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29513728 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29596672 } ], "md5sum": "7e3b211c2877245e5733fbfe64581ccb" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "adae781a50202954d476d2dafb807113" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "fbd7b62262b0819c9bcf87d14080fe4f" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "fc3e88d1a371c9f7896a7669e2e922b0" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "ca913b756d51aa984983cdfc2781bfa2" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 29605888, "records": [ { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14696448 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14703616 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14710784 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29499392 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29506560 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29513728 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29596672 } ], "md5sum": "bb7c72f0895d6c864423d9b09e4e0d8d" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "de0ae8f1303ae31861139ab685a369ef" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 29492224, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14696448 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14703616 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14779392 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14786560 }, { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14795776 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23053312 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23062528 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29485056 } ], "md5sum": "51d36f80c7fe6386aaae33b50702aa9d" } ] }