{ "metadata": { "ParamSize": 110, "ParamBytes": 5012344832.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 1048576000, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 256000, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576000, "byteOffset": 0 } ], "md5sum": "206485d87a22f62128d5b2494bafe7db" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 2048, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "0af16f8f82b18456b292f26c2e72f63b" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 32768, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "2ba31ef002c5e642855d91dbaf959171" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 2048, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "c7d1d36c62d66bea4696e965fff1917e" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 32768, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "362ff38d501c9881a63569d0cfa88605" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 4096 }, { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8192 }, { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10493952 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18882560 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18886656 }, { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18890752 } ], "md5sum": "5d30e8d1452ab145a0564794c26cd791" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 2048, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "fe63b3dbe2543296f8112fac2ade5290" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 32768, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "629331e7d43d063ae03d036060febb5f" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 2048, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "cd3f99f63fc39b1170222cb6f4717b72" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 32768, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "c9193e83b6a383f990b1d7d703a6f80f" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 27279360, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8396800 }, { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18882560 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27271168 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27275264 } ], "md5sum": "59d0b4f1bd55ceeda6e5ab96633e9451" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 2048, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "820b534e89d40aa248659ca0081f0674" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 32768, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "256dabcc684ebacdfdbbcfa0adc035e1" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "7d790e2cfa6142474ac62a0ec5aea10c" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 2048, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "908e1855eb654380dfcd3cfd85a1e631" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 32768, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "ea87b38d7fde1e77b55f18aa11c0ed31" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 2048, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "f795202d6edb9f663561fa0cf03fa27a" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 32768, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "88378b8b2905d8b91937c001c73403cb" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 27279360, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8396800 }, { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18882560 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27271168 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27275264 } ], "md5sum": "071c71fc33170a2d3a1a2ae1f03b140d" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 2048, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "b58913c0a9d042f1469b2c4929efb8d3" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 32768, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "7f3594a3e54595cf7f92b5901e0f6465" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.15.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "b7c100b48dd50547c5c74f03a68863aa" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 2048, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "497e575ba76a1ae6e33393fd7c9c4c12" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 32768, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "7e13a326187148b0ea54878998af1dbe" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 32768, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "3e8d823c128094965bc0b63996bd30e0" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.16.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8396800 }, { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18882560 } ], "md5sum": "c51345fdd445c9c55206b86ee31d704a" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 2048, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "708872d176d1deddd88435c817e93d2f" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 32768, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "0eef3929b28890b79faf9d02dfa2487f" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "6328df0ffee1d1160e0fc6fa300f4c9a" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 2048, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "30f748096a7ec18a6e349c49a7d14822" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 32768, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "a15a4fe50c6c5b78f36ec6351732a345" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 2048, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "5b697bbb99453187d0eee6b6127d2008" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 32768, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "76327282d65a9c80614e17129cb9e5f2" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 27279360, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8396800 }, { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18882560 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27271168 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27275264 } ], "md5sum": "1f73ecde85ffc44fa98fba7534c55356" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 2048, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "c983dfde03cb18ee85a7d80d50f4d165" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 32768, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "afb45d7056b90f804e2c75489be5792e" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "a8c9cd2cc4d6179ffa0d181051ef4f7e" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 2048, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "89e90749de11be2bc1ee29c5450a4f3d" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 32768, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "bde36e490b2c3a905f40e1285f4873dd" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 2048, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "eefc32a2ccf311c1a95e01ca979aaa9f" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 32768, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "c31b3371a8c2294135e993653a25d153" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 27279360, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8396800 }, { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18882560 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27271168 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27275264 } ], "md5sum": "a9a738d8b541d5d1517049e641a72c6f" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 2048, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "b4d55621d295f8693b39a501ea581847" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 32768, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "1031e7308cb30a67747b65b4fdaa94af" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "f74050232fb038c2c717ca99af7cdada" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 2048, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "2639e2fc57901dac06e9d4183ce2ee34" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 32768, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "cdcce0c795b9df58125fb24aa22f62e8" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 2048, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "135969d29acd56ae5e9ac553cb476528" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 27283456, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8396800 }, { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18882560 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27271168 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27275264 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27279360 } ], "md5sum": "7bca8fca53ef5ba8a977e4733d66824a" } ] }