{ "metadata": { "ParamSize": 65, "ParamBytes": 830578688.0, "BitsPerParam": 4.500414746671623 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32000, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "34491ecb22b5e7f4ca56d82527c438f9" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 33357824, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 32000, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192000, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 8192000 } ], "md5sum": "661089d83913d30e7575835461db0abf" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "78b992a9d971e604e70b413c233a60c8" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "2120fd58f3fa655d99cc9fe71bfbee19" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c15d92613464fb1db85970571ee48e90" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 12582912 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 18219008 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21037056 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "dfe551efa483fbf60228e22987a26a32" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "c68ba999a632e735721f525e36cb4188" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "567649c94813883b48ed1dfcc8ee6998" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 1048576 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 6684672 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 29229056 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32047104 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "573dff167a868366a699177dbfbc44c2" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "d369ce6ca2712ad0265c77bc31aca5c9" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "c642cf007bf1c783ce4318c03dd865f5" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c677dbc5d6763d852ccc339f40d70a46" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 12582912 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 18219008 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21037056 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "2fe4e2d31ded29ecbc97713c15cf3065" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "b245482ff81bc17b05d1deaba369fddc" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3c69628ac1609e53340a59ba0d171bf9" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 1048576 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 6684672 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 29229056 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32047104 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "756331ddf99ea961d49b8df7b6008ed1" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "faaae6a9aea2564ef9c18b82b6cc119c" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "325c866363e7b74b4ab16daf82c0c166" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "be9918024a9f24380a415b53edf2dd9f" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 12582912 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 18219008 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21037056 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "44c124c1dc4bc71dab03063e8902598f" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "2704865ad9cc478fcb1d00bea211f60b" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "lm_head.q_weight", "shape": [ 512, 32000 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "3ab5243697098da96f215d9c7fe2a7ca" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 32071680, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 1048576 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 6684672 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 29229056 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32047104 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32055296 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32063488 } ], "md5sum": "ca0268efd419df23435db72a5aa6133b" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 8192000, "records": [ { "name": "lm_head.q_scale", "shape": [ 128, 32000 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192000, "byteOffset": 0 } ], "md5sum": "4c61f062381dc9b41123ce288cc6a887" } ] }