pythia-1b / ndarray-cache-b16.json
Amai
Add weights
8121406
{
"metadata": {
"ParamSize": 196,
"ParamBytes": 2023890944.0,
"BitsPerParam": 16.002590914795338
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 206045184,
"records": [
{
"name": "gpt_neox.embed_in.weight",
"shape": [
50304,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 206045184,
"byteOffset": 0
}
],
"md5sum": "da22c1a38504f2cdb102f78567ef6329"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 25194496,
"records": [
{
"name": "gpt_neox.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.0.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 4096
},
{
"name": "gpt_neox.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8192
},
{
"name": "gpt_neox.layers.0.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 12288
},
{
"name": "gpt_neox.layers.0.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 16384
},
{
"name": "gpt_neox.layers.0.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25182208
}
],
"md5sum": "e2d7cc2fa350f011ed0dbee17a7ac7e8"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.0.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e4a3272a25b09a7f71da4953d754e374"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.0.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e458ab97e79a42f7d337f26ed979dafa"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.1.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "47dd12a2f3b316282581e8d851f16361"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.1.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8b376542ee52be1a671247bc19759893"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.1.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8d429e502c7ffe5efcdeff2840694943"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.2.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f6b6c9c4ce289692d543d75a0ba5693c"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.2.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "267dc9558c1f41aef67d0e1f022be352"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.2.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "cee8c73decb5ea89ffbef7ee86bf8689"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.3.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "db045065ae5ef84f1464bc836113ef76"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 25325568,
"records": [
{
"name": "gpt_neox.layers.0.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.0.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "gpt_neox.layers.0.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "gpt_neox.layers.0.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "gpt_neox.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "gpt_neox.layers.1.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "gpt_neox.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8421376
},
{
"name": "gpt_neox.layers.1.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8425472
},
{
"name": "gpt_neox.layers.1.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 8429568
},
{
"name": "gpt_neox.layers.1.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8441856
},
{
"name": "gpt_neox.layers.1.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16830464
},
{
"name": "gpt_neox.layers.1.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16834560
},
{
"name": "gpt_neox.layers.1.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "gpt_neox.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16855040
},
{
"name": "gpt_neox.layers.2.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16859136
},
{
"name": "gpt_neox.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16863232
},
{
"name": "gpt_neox.layers.2.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16867328
},
{
"name": "gpt_neox.layers.2.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 16871424
},
{
"name": "gpt_neox.layers.2.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16883712
},
{
"name": "gpt_neox.layers.2.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25272320
},
{
"name": "gpt_neox.layers.2.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25276416
},
{
"name": "gpt_neox.layers.2.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25292800
},
{
"name": "gpt_neox.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25296896
},
{
"name": "gpt_neox.layers.3.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25300992
},
{
"name": "gpt_neox.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25305088
},
{
"name": "gpt_neox.layers.3.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25309184
},
{
"name": "gpt_neox.layers.3.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25313280
}
],
"md5sum": "a300f069dd9a566a4d85894be26bd41d"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.3.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3bf00c3ee5d95ad98f32ea93b3b6ca63"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.3.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2bc23e26ce74f84d28e0c5f69bf408b3"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.4.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1aa107f47a4ca56262e01cec478fb5ed"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.4.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3c65c52252e8eb6acb5856d86e3bc051"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.4.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "311a1252100864f529064f11e902487f"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.5.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "011f5a5c7cb3f338e981f263ed52f81b"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.5.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "df4abbad1146d3fc204e354532df4c32"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.5.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "553c08ac1659b464bf8dba3f262303a1"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.6.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "ba5ea4be88cb85fc9109dac76d309c05"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 25325568,
"records": [
{
"name": "gpt_neox.layers.3.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.3.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "gpt_neox.layers.3.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "gpt_neox.layers.3.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "gpt_neox.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "gpt_neox.layers.4.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "gpt_neox.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8421376
},
{
"name": "gpt_neox.layers.4.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8425472
},
{
"name": "gpt_neox.layers.4.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 8429568
},
{
"name": "gpt_neox.layers.4.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8441856
},
{
"name": "gpt_neox.layers.4.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16830464
},
{
"name": "gpt_neox.layers.4.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16834560
},
{
"name": "gpt_neox.layers.4.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "gpt_neox.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16855040
},
{
"name": "gpt_neox.layers.5.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16859136
},
{
"name": "gpt_neox.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16863232
},
{
"name": "gpt_neox.layers.5.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16867328
},
{
"name": "gpt_neox.layers.5.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 16871424
},
{
"name": "gpt_neox.layers.5.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16883712
},
{
"name": "gpt_neox.layers.5.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25272320
},
{
"name": "gpt_neox.layers.5.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25276416
},
{
"name": "gpt_neox.layers.5.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25292800
},
{
"name": "gpt_neox.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25296896
},
{
"name": "gpt_neox.layers.6.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25300992
},
{
"name": "gpt_neox.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25305088
},
{
"name": "gpt_neox.layers.6.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25309184
},
{
"name": "gpt_neox.layers.6.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25313280
}
],
"md5sum": "0f848458ed7ba85e561c6ac6953346aa"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.6.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c483f21a9a023e8537dcb3a97635ac32"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.6.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1b8f8a2187e8fe0821a62f6aecc43903"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.7.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0c3a014c3c16ecf900cabc943c57a8b4"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.7.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3a068195dd649252ba4aab5ebff92b9c"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.7.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c32830c1ad07599365b35bdb97040713"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.8.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "517ecd0e8410e75bd7b3be0f44d4f656"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.8.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "4513e52799f3966b3090fe8f2c198570"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.8.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "20f134a1a67f8da542a1594f8d145687"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.9.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7cda7ef64a1611eb36cff4f88fa70fd7"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 25325568,
"records": [
{
"name": "gpt_neox.layers.6.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.6.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "gpt_neox.layers.6.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "gpt_neox.layers.6.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "gpt_neox.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "gpt_neox.layers.7.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "gpt_neox.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8421376
},
{
"name": "gpt_neox.layers.7.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8425472
},
{
"name": "gpt_neox.layers.7.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 8429568
},
{
"name": "gpt_neox.layers.7.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8441856
},
{
"name": "gpt_neox.layers.7.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16830464
},
{
"name": "gpt_neox.layers.7.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16834560
},
{
"name": "gpt_neox.layers.7.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "gpt_neox.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16855040
},
{
"name": "gpt_neox.layers.8.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16859136
},
{
"name": "gpt_neox.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16863232
},
{
"name": "gpt_neox.layers.8.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16867328
},
{
"name": "gpt_neox.layers.8.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 16871424
},
{
"name": "gpt_neox.layers.8.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16883712
},
{
"name": "gpt_neox.layers.8.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25272320
},
{
"name": "gpt_neox.layers.8.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25276416
},
{
"name": "gpt_neox.layers.8.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25292800
},
{
"name": "gpt_neox.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25296896
},
{
"name": "gpt_neox.layers.9.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25300992
},
{
"name": "gpt_neox.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25305088
},
{
"name": "gpt_neox.layers.9.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25309184
},
{
"name": "gpt_neox.layers.9.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25313280
}
],
"md5sum": "e259ef76f92def9684153bc75388edae"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.9.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "4e2fa9d7b430b8093610199bacc9e7a8"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.9.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "833726affdd62d945fea452daa4be81e"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.10.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a90521567e0f31a50ab8ee830a33ec11"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.10.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2017f8b0fcce69d68a36eddf4f065fb6"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.10.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1bb0df0f78e29b562fb80e9416f2e6d0"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.11.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c5bcae6dd378facfbb316629a353996e"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.11.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "77116769d32f06f166da36ca2a268c64"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.11.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b9e187b32f191e5cd83521eb59d918fd"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.12.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "010608c5a52ef822e207474b2f209681"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 25325568,
"records": [
{
"name": "gpt_neox.layers.9.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.9.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "gpt_neox.layers.9.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "gpt_neox.layers.9.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "gpt_neox.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "gpt_neox.layers.10.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "gpt_neox.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8421376
},
{
"name": "gpt_neox.layers.10.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8425472
},
{
"name": "gpt_neox.layers.10.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 8429568
},
{
"name": "gpt_neox.layers.10.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8441856
},
{
"name": "gpt_neox.layers.10.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16830464
},
{
"name": "gpt_neox.layers.10.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16834560
},
{
"name": "gpt_neox.layers.10.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "gpt_neox.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16855040
},
{
"name": "gpt_neox.layers.11.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16859136
},
{
"name": "gpt_neox.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16863232
},
{
"name": "gpt_neox.layers.11.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16867328
},
{
"name": "gpt_neox.layers.11.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 16871424
},
{
"name": "gpt_neox.layers.11.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16883712
},
{
"name": "gpt_neox.layers.11.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25272320
},
{
"name": "gpt_neox.layers.11.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25276416
},
{
"name": "gpt_neox.layers.11.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25292800
},
{
"name": "gpt_neox.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25296896
},
{
"name": "gpt_neox.layers.12.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25300992
},
{
"name": "gpt_neox.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25305088
},
{
"name": "gpt_neox.layers.12.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25309184
},
{
"name": "gpt_neox.layers.12.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25313280
}
],
"md5sum": "9d0c8914ae6f3f6fcfb66ac13eb4d0a3"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.12.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ab87a989c27431535f15efa4e031aad7"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.12.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0dfda91cfe8ba98fff5a402cfe1dc7df"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.13.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "287808f7646050482dd9d4f73f3d71e1"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.13.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d556cb9c06f1ffaf060b18a650d7eec2"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.13.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c2e914507fcbf57fa0f6e2c0d0803550"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.14.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6dc1cb91a703d693796b3135b7a95ff6"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.14.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9905cbb2b08b5ffbc8b154005dd5ed0c"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.14.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "62120c836ef22bd90e4391cd148ccd2f"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.15.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f3756f5a5824708fd51f3de6e54449e5"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 25325568,
"records": [
{
"name": "gpt_neox.layers.12.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.12.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "gpt_neox.layers.12.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "gpt_neox.layers.12.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "gpt_neox.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "gpt_neox.layers.13.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "gpt_neox.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8421376
},
{
"name": "gpt_neox.layers.13.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8425472
},
{
"name": "gpt_neox.layers.13.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 8429568
},
{
"name": "gpt_neox.layers.13.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8441856
},
{
"name": "gpt_neox.layers.13.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16830464
},
{
"name": "gpt_neox.layers.13.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16834560
},
{
"name": "gpt_neox.layers.13.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "gpt_neox.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16855040
},
{
"name": "gpt_neox.layers.14.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16859136
},
{
"name": "gpt_neox.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16863232
},
{
"name": "gpt_neox.layers.14.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16867328
},
{
"name": "gpt_neox.layers.14.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 16871424
},
{
"name": "gpt_neox.layers.14.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16883712
},
{
"name": "gpt_neox.layers.14.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25272320
},
{
"name": "gpt_neox.layers.14.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25276416
},
{
"name": "gpt_neox.layers.14.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25292800
},
{
"name": "gpt_neox.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25296896
},
{
"name": "gpt_neox.layers.15.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25300992
},
{
"name": "gpt_neox.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25305088
},
{
"name": "gpt_neox.layers.15.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25309184
},
{
"name": "gpt_neox.layers.15.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25313280
}
],
"md5sum": "b7ae6066328d6d58cdba7752718dea43"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.15.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "362174f589aa7225e0ed1396c68b612b"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.15.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "abb280cf7c20e138d18a8f9cf85322c8"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 206045184,
"records": [
{
"name": "embed_out.weight",
"shape": [
50304,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 206045184,
"byteOffset": 0
}
],
"md5sum": "3939e915eb3bd60919c8b6d517ea99f6"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 8421376,
"records": [
{
"name": "gpt_neox.layers.15.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.15.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "gpt_neox.layers.15.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "gpt_neox.layers.15.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "gpt_neox.final_layer_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "gpt_neox.final_layer_norm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8417280
}
],
"md5sum": "4eaa42565d775e54b8a02497b2a7e15d"
}
]
}