llama3_final-q0f16 / ndarray-cache.json
Ericao's picture
Upload 86 files
0b97616 verified
{
"metadata": {
"ParamSize": 195,
"ParamBytes": 4351336448.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 1050673152,
"records": [
{
"name": "lm_head.weight",
"shape": [
128256,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1050673152,
"byteOffset": 0
}
],
"md5sum": "a6945d49d5d076ecd0be132b67ce3cc2"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 1050673152,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
128256,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1050673152,
"byteOffset": 0
}
],
"md5sum": "f2d28da53eec7a4bfb2c19fb349ee23d"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "4218ddc28a6c0151e5458ba7060bc329"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 29040640,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 8192
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16449536
},
{
"name": "model.layers.0.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16457728
}
],
"md5sum": "090ec36bde00654f6601bc09247a1b65"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "e7759d97a1d4cc522112872d2c8161a2"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 24846336,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 8396800
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24838144
}
],
"md5sum": "9809570a155894469f3cc79405dc5a8d"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20971520
}
],
"md5sum": "291eda3b1e4b9f31532375e52c9407e7"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "561cf7d055b90f2cfd15bdc5db58a259"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 29032448,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 0
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16441344
},
{
"name": "model.layers.10.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16449536
}
],
"md5sum": "c1a523898a3dfb8bfef948dde4cf7470"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "9c53f05ac65676773f007f265304887c"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 24846336,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 8396800
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24838144
}
],
"md5sum": "c653f88e4a80f89211dbe7fd5ff3c4d6"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20971520
}
],
"md5sum": "a06d69662fe87e6d7a19eb249b498fb1"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "f84ebe5cbed27d71559a7e534acdc581"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 29032448,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 0
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16441344
},
{
"name": "model.layers.12.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16449536
}
],
"md5sum": "9ebb199a29e44baa71cfff1e721fa134"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 29368320,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 8388608
},
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 20971520
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29360128
}
],
"md5sum": "0bb56185c02b5b8c711e2425d8025d01"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "aef8de5e993758b15eb5b6bd644e9e2c"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 29032448,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16441344
},
{
"name": "model.layers.2.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16449536
}
],
"md5sum": "1f955ff730579ec27c0ba9f2725af6a8"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "fb91b80d103ac9c19a0fe9014c8906f3"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 24846336,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 8396800
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24838144
}
],
"md5sum": "d8afc57a17e14926dfed09115af611ea"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20971520
}
],
"md5sum": "7f9df5d17af89ac535a773afa6ed0aad"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "8fbbae308233d5b8dcefdf356b59ac36"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 29032448,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 0
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16441344
},
{
"name": "model.layers.4.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16449536
}
],
"md5sum": "7b84900cc51857acc42b967e3d17436d"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "5782254d44ead3738e149c25e65c894e"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 24846336,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 8396800
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24838144
}
],
"md5sum": "1bbb109c73616ee7dc5ad55db9278bec"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20971520
}
],
"md5sum": "5a9ff46dec05f8dfed5ec08c03df20a4"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "c556c40103406d7b625b4cef4b6e4e0c"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 29032448,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 0
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16441344
},
{
"name": "model.layers.6.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16449536
}
],
"md5sum": "8b2c3718766480f0b7ead3c0aa0f3194"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "7e5fcef182a8fc479eafd028327f3029"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 24846336,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 8396800
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24838144
}
],
"md5sum": "a09ce869e92a907c189ac390c43cdb42"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20971520
}
],
"md5sum": "fe059ab06443ff0d20b60761f2947c05"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "0f1ccdced576f38fd1984f81f4e6a86e"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 29032448,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 0
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16441344
},
{
"name": "model.layers.8.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16449536
}
],
"md5sum": "14b13e11322c24b9d2fb5d91cbbc1bcb"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "7ce20ffcea44113c0e4da6eff81ab645"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 24846336,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 8396800
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24838144
}
],
"md5sum": "3e5c42660a330f61d5ea023f9ff21112"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20971520
}
],
"md5sum": "b0e99aa7cf74a0f7059e845dd938a7c3"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "329bfb01649ae4fd1572e51c24fae37a"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "9ead9b7622937a99bfc19fb6add515e5"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 32907264,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 0
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16441344
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16449536
},
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 16457728
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32899072
}
],
"md5sum": "334a516d63f4ecc4b90a282302f7e344"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20971520
}
],
"md5sum": "12b8fd93a259e49cc1da9888bff7dcb9"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "eeb68f51f4a8fd42ef0b1bbf6bbc0445"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 29032448,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 0
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16441344
},
{
"name": "model.layers.15.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16449536
}
],
"md5sum": "4f51374c1bdaaa7ea44f1ce54ab93ed5"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "b7df8e3f453390e6639429ed65796e9e"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 24846336,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 8396800
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24838144
}
],
"md5sum": "10490442548d03e5bdd4794992f558e1"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20971520
}
],
"md5sum": "d888f03e1c6f16df666fd1573e62117b"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "c6acc6f3919997f064b0d3e13d43f89e"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 29032448,
"records": [
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 0
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16441344
},
{
"name": "model.layers.17.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16449536
}
],
"md5sum": "aa361d5bf8afbb6d768c6717ea06b172"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "2a775fb2ffb4d399f6f2504b1ee9eab1"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 24846336,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 8396800
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24838144
}
],
"md5sum": "76a7fbef1c176da272685519016863aa"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20971520
}
],
"md5sum": "3e6f4454d15d71856777d4c48f4ecc09"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "2b65d9043261724c95b5508a0fef14c5"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 29032448,
"records": [
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 0
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16441344
},
{
"name": "model.layers.19.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16449536
}
],
"md5sum": "44a2689a1c921c4d80b8e6684f25cd8f"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "738c9fed2a8d44d354553619baa2ddee"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 24846336,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "model.layers.20.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 8396800
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24838144
}
],
"md5sum": "ef142e9f5988f1daab6502bd02c5f07d"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20971520
}
],
"md5sum": "a5c7a2cf1b431a400ac8101f6f940f5e"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "460612c5574cb3060b7218c717120af1"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 29032448,
"records": [
{
"name": "model.layers.21.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 0
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16441344
},
{
"name": "model.layers.21.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16449536
}
],
"md5sum": "c70b0882804570266a24b2df87e4384a"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "6a1174b9cd0306313f1d4f823d2fa5f7"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 24846336,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "model.layers.22.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 8396800
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24838144
}
],
"md5sum": "a629ec45894de8675367d79a49af73df"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20971520
}
],
"md5sum": "d4c35f96a6ff96d674a1592ab87a128d"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "29bbbc27a9ce703927cc98bcbdab12f4"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 29032448,
"records": [
{
"name": "model.layers.23.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 0
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16441344
},
{
"name": "model.layers.23.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16449536
}
],
"md5sum": "5c803b0d1c1099f21ce2a8b6ad1c8d74"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "5127783bfc9f7c2a33dc77583fa40856"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 24846336,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "model.layers.24.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 8396800
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24838144
}
],
"md5sum": "7eda786cee32e8afef5af73a4fa4d5ff"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20971520
}
],
"md5sum": "bba3a157abf834b900707a51b31da797"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "a7a7026dbe640ad0ce0e57590607999c"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 29032448,
"records": [
{
"name": "model.layers.25.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 0
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16441344
},
{
"name": "model.layers.25.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16449536
}
],
"md5sum": "c9f61caa7b4abfd58f59e155b52d9cfc"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "de1e6c2f8abdb1f5e4140d9136c6601c"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 24846336,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "model.layers.26.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 8396800
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24838144
}
],
"md5sum": "4bd1412e5f2385ef7b522688f1a5bfe2"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20971520
}
],
"md5sum": "74eec2216834e1dc51ac51fc2231f274"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "83df1774febbc8c39919872ccd3db2e9"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 29032448,
"records": [
{
"name": "model.layers.27.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 0
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16441344
},
{
"name": "model.layers.27.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16449536
}
],
"md5sum": "54eba7af61e9fef7f41ec1930d34db26"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "25b9a6790c83cf06cc5912d3df0885f3"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 24846336,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "model.layers.28.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 8396800
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24838144
}
],
"md5sum": "7c599262b59fc75d06bb1944aba844b7"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20971520
}
],
"md5sum": "aceb741d6e80f0951a4192738aa79822"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "d2c5e9964d28e1cb9aadde6a73b27e30"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 29032448,
"records": [
{
"name": "model.layers.29.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 0
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16441344
},
{
"name": "model.layers.29.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16449536
}
],
"md5sum": "400237f6c3166c8be47ce13a6a65fff4"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "623ddc2870e252822e9857b22f2a7b8f"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 24846336,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
},
{
"name": "model.layers.30.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 8396800
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24838144
}
],
"md5sum": "3cc73317e96fc604524871e150a15676"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20971520
}
],
"md5sum": "94c1e7223dab086b2a0e221ce7372dce"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 32882688,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.weight",
"shape": [
4014,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32882688,
"byteOffset": 0
}
],
"md5sum": "4f58d86fd51763a2812ee020c952c543"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 29032448,
"records": [
{
"name": "model.layers.31.mlp.down_proj.weight",
"shape": [
4096,
2007
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16441344,
"byteOffset": 0
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16441344
},
{
"name": "model.layers.31.self_attn.qkv_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16449536
}
],
"md5sum": "e5be9dbae7631dd8d285a9261efd7ea5"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 8396800,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.weight",
"shape": [
4096,
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8388608
}
],
"md5sum": "d622b6c2b03f084636e78fc408f5a3b4"
}
]
}