pythia-1.4b / ndarray-cache-b16.json
Amai
Add weights
d29fa73
{
"metadata": {
"ParamSize": 292,
"ParamBytes": 2829787136.0,
"BitsPerParam": 16.002779603501143
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 206045184,
"records": [
{
"name": "gpt_neox.embed_in.weight",
"shape": [
50304,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 206045184,
"byteOffset": 0
}
],
"md5sum": "5c85d469d3a6358e6347e3d436bd31bf"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 25194496,
"records": [
{
"name": "gpt_neox.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.0.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 4096
},
{
"name": "gpt_neox.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8192
},
{
"name": "gpt_neox.layers.0.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 12288
},
{
"name": "gpt_neox.layers.0.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 16384
},
{
"name": "gpt_neox.layers.0.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25182208
}
],
"md5sum": "e9a26a7d91f1dd5aa9ec5a31a81eaf10"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.0.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e9f037184c8a59a774c5be6025e82767"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.0.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5e0386abd4410d1573e31ee499943b4d"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.1.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "cdc3abdd0a16c79ecc23b3556937a5dd"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.1.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1418e5b945743e6a20885808db2f622c"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.1.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ad1cb92ebcc2ce96282f3c00f58da63b"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.2.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6b8d7161b41b9fed0e4ba52d80d9ec24"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.2.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3a5ab570c4ba3c5e0f33ec0c673e2493"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.2.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f71aba1427bca21e1d29d61946990120"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.3.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3da95990b08e04ca83c188524f5768b5"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 25325568,
"records": [
{
"name": "gpt_neox.layers.0.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.0.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "gpt_neox.layers.0.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "gpt_neox.layers.0.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "gpt_neox.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "gpt_neox.layers.1.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "gpt_neox.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8421376
},
{
"name": "gpt_neox.layers.1.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8425472
},
{
"name": "gpt_neox.layers.1.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 8429568
},
{
"name": "gpt_neox.layers.1.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8441856
},
{
"name": "gpt_neox.layers.1.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16830464
},
{
"name": "gpt_neox.layers.1.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16834560
},
{
"name": "gpt_neox.layers.1.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "gpt_neox.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16855040
},
{
"name": "gpt_neox.layers.2.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16859136
},
{
"name": "gpt_neox.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16863232
},
{
"name": "gpt_neox.layers.2.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16867328
},
{
"name": "gpt_neox.layers.2.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 16871424
},
{
"name": "gpt_neox.layers.2.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16883712
},
{
"name": "gpt_neox.layers.2.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25272320
},
{
"name": "gpt_neox.layers.2.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25276416
},
{
"name": "gpt_neox.layers.2.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25292800
},
{
"name": "gpt_neox.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25296896
},
{
"name": "gpt_neox.layers.3.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25300992
},
{
"name": "gpt_neox.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25305088
},
{
"name": "gpt_neox.layers.3.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25309184
},
{
"name": "gpt_neox.layers.3.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25313280
}
],
"md5sum": "ad5604a32091a27b4f37422beb5c7127"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.3.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "58b49fd1d5e3a8d05d9c8b9ee95a1c4b"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.3.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1a1b8a67a671d406dd93492ddb815601"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.4.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0392972a04f5ae29ff6621ca610dae21"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.4.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2a8995d68b63ed87095c98aba7a0fd47"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.4.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "28471949d20439bca15dade58db44294"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.5.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "08271db9bdb1dd2c4c5bec8f957d7719"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.5.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5fa43ba0049345a1d2e8f7cf2bfb9748"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.5.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "670aa4e48da4b2d38001281fdbbc1be8"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.6.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c9fa78aca5c109288be40321e637f6d2"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 25325568,
"records": [
{
"name": "gpt_neox.layers.3.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.3.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "gpt_neox.layers.3.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "gpt_neox.layers.3.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "gpt_neox.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "gpt_neox.layers.4.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "gpt_neox.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8421376
},
{
"name": "gpt_neox.layers.4.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8425472
},
{
"name": "gpt_neox.layers.4.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 8429568
},
{
"name": "gpt_neox.layers.4.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8441856
},
{
"name": "gpt_neox.layers.4.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16830464
},
{
"name": "gpt_neox.layers.4.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16834560
},
{
"name": "gpt_neox.layers.4.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "gpt_neox.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16855040
},
{
"name": "gpt_neox.layers.5.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16859136
},
{
"name": "gpt_neox.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16863232
},
{
"name": "gpt_neox.layers.5.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16867328
},
{
"name": "gpt_neox.layers.5.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 16871424
},
{
"name": "gpt_neox.layers.5.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16883712
},
{
"name": "gpt_neox.layers.5.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25272320
},
{
"name": "gpt_neox.layers.5.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25276416
},
{
"name": "gpt_neox.layers.5.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25292800
},
{
"name": "gpt_neox.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25296896
},
{
"name": "gpt_neox.layers.6.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25300992
},
{
"name": "gpt_neox.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25305088
},
{
"name": "gpt_neox.layers.6.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25309184
},
{
"name": "gpt_neox.layers.6.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25313280
}
],
"md5sum": "08321f9d815211578a56e0c10fd8b1e8"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.6.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c961341e57b11112ec7e90cc514afe5a"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.6.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "17ceddba5f67f049981dcd8a99248205"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.7.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1a871777026dc7718f612cfa9609660f"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.7.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "024d1739808880b33539763db416004a"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.7.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6f3e2d630526e4c3d78abed57e49bf87"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.8.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5f9351db9f5ab811bdcc91b70d61b6cf"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.8.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5245375f86c9d3ddaef4c0c6ba560545"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.8.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3163ff001e468b2f96a4eaa806436693"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.9.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c149b7484226eb92bd7e762609ac9877"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 25325568,
"records": [
{
"name": "gpt_neox.layers.6.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.6.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "gpt_neox.layers.6.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "gpt_neox.layers.6.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "gpt_neox.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "gpt_neox.layers.7.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "gpt_neox.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8421376
},
{
"name": "gpt_neox.layers.7.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8425472
},
{
"name": "gpt_neox.layers.7.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 8429568
},
{
"name": "gpt_neox.layers.7.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8441856
},
{
"name": "gpt_neox.layers.7.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16830464
},
{
"name": "gpt_neox.layers.7.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16834560
},
{
"name": "gpt_neox.layers.7.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "gpt_neox.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16855040
},
{
"name": "gpt_neox.layers.8.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16859136
},
{
"name": "gpt_neox.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16863232
},
{
"name": "gpt_neox.layers.8.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16867328
},
{
"name": "gpt_neox.layers.8.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 16871424
},
{
"name": "gpt_neox.layers.8.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16883712
},
{
"name": "gpt_neox.layers.8.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25272320
},
{
"name": "gpt_neox.layers.8.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25276416
},
{
"name": "gpt_neox.layers.8.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25292800
},
{
"name": "gpt_neox.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25296896
},
{
"name": "gpt_neox.layers.9.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25300992
},
{
"name": "gpt_neox.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25305088
},
{
"name": "gpt_neox.layers.9.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25309184
},
{
"name": "gpt_neox.layers.9.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25313280
}
],
"md5sum": "7005f9c34f17233961dfc6edebaad8eb"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.9.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "cd488d1027324116a2f63d7716938388"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.9.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e18919f3e55dabe26f4a95bafcfc5a94"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.10.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "81ba98beff1928f2878726b897b5f50f"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.10.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3fa173e12024e8f713959b35c584c347"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.10.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "91d488935d479fd044145ef160e622f0"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.11.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4691957bfad6f60abb5ae21aa65afc4f"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.11.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "89032d5755b98354f1f51711f5c16605"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.11.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8eaf9647301b564d89efd579dd7d0705"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.12.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "12de3c278d4729698b80bed6853dc94b"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 25325568,
"records": [
{
"name": "gpt_neox.layers.9.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.9.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "gpt_neox.layers.9.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "gpt_neox.layers.9.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "gpt_neox.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "gpt_neox.layers.10.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "gpt_neox.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8421376
},
{
"name": "gpt_neox.layers.10.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8425472
},
{
"name": "gpt_neox.layers.10.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 8429568
},
{
"name": "gpt_neox.layers.10.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8441856
},
{
"name": "gpt_neox.layers.10.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16830464
},
{
"name": "gpt_neox.layers.10.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16834560
},
{
"name": "gpt_neox.layers.10.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "gpt_neox.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16855040
},
{
"name": "gpt_neox.layers.11.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16859136
},
{
"name": "gpt_neox.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16863232
},
{
"name": "gpt_neox.layers.11.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16867328
},
{
"name": "gpt_neox.layers.11.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 16871424
},
{
"name": "gpt_neox.layers.11.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16883712
},
{
"name": "gpt_neox.layers.11.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25272320
},
{
"name": "gpt_neox.layers.11.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25276416
},
{
"name": "gpt_neox.layers.11.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25292800
},
{
"name": "gpt_neox.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25296896
},
{
"name": "gpt_neox.layers.12.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25300992
},
{
"name": "gpt_neox.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25305088
},
{
"name": "gpt_neox.layers.12.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25309184
},
{
"name": "gpt_neox.layers.12.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25313280
}
],
"md5sum": "2d2c16e0f24eea73ed9c12ea2b48e044"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.12.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "bf44c2f3eea6c63a449002229e4f9b53"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.12.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8109f192e82840af3d7115dbb17aa0a8"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.13.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "44f1043bb9c30fe23a1d8e747e6c5a1e"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.13.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8acb05b2b698ef701894f2ba340b2923"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.13.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c05dedd7a76c3755dbbecd0190fce1e2"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.14.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f7a17b48338f6ac417b432292d9760ae"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.14.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "a00ec52fc60d8b9f488a51121519d442"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.14.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "979b79166cc3ac7748525748ff9e847f"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.15.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4705f8edbeb4285aecb78f2d0ca96394"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 25325568,
"records": [
{
"name": "gpt_neox.layers.12.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.12.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "gpt_neox.layers.12.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "gpt_neox.layers.12.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "gpt_neox.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "gpt_neox.layers.13.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "gpt_neox.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8421376
},
{
"name": "gpt_neox.layers.13.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8425472
},
{
"name": "gpt_neox.layers.13.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 8429568
},
{
"name": "gpt_neox.layers.13.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8441856
},
{
"name": "gpt_neox.layers.13.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16830464
},
{
"name": "gpt_neox.layers.13.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16834560
},
{
"name": "gpt_neox.layers.13.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "gpt_neox.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16855040
},
{
"name": "gpt_neox.layers.14.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16859136
},
{
"name": "gpt_neox.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16863232
},
{
"name": "gpt_neox.layers.14.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16867328
},
{
"name": "gpt_neox.layers.14.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 16871424
},
{
"name": "gpt_neox.layers.14.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16883712
},
{
"name": "gpt_neox.layers.14.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25272320
},
{
"name": "gpt_neox.layers.14.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25276416
},
{
"name": "gpt_neox.layers.14.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25292800
},
{
"name": "gpt_neox.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25296896
},
{
"name": "gpt_neox.layers.15.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25300992
},
{
"name": "gpt_neox.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25305088
},
{
"name": "gpt_neox.layers.15.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25309184
},
{
"name": "gpt_neox.layers.15.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25313280
}
],
"md5sum": "501211c07a1bf63effb67a9e7362c947"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.15.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5dc9a03450b51fc089323e231ec6fca6"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.15.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8b7f08d0cad3d23ef1919533b50b45a0"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.16.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7380f2361468bad81bee85c71981bcfe"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.16.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2e7e72f351414b0cdf813c051e1bc694"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.16.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "897b512dad54e52b28b30be80bfecd4a"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.17.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "da21123eb84a87b76b4907d34b7c24b8"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.17.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5b2bbf1b88d9c8aa5078f5680062f0b4"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.17.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "99872e9f912cd701b82243a90fa3775e"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.18.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "867e8116a0e289814e00f32d60cf3969"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 25325568,
"records": [
{
"name": "gpt_neox.layers.15.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.15.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "gpt_neox.layers.15.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "gpt_neox.layers.15.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "gpt_neox.layers.16.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "gpt_neox.layers.16.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "gpt_neox.layers.16.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8421376
},
{
"name": "gpt_neox.layers.16.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8425472
},
{
"name": "gpt_neox.layers.16.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 8429568
},
{
"name": "gpt_neox.layers.16.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8441856
},
{
"name": "gpt_neox.layers.16.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16830464
},
{
"name": "gpt_neox.layers.16.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16834560
},
{
"name": "gpt_neox.layers.16.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "gpt_neox.layers.17.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16855040
},
{
"name": "gpt_neox.layers.17.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16859136
},
{
"name": "gpt_neox.layers.17.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16863232
},
{
"name": "gpt_neox.layers.17.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16867328
},
{
"name": "gpt_neox.layers.17.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 16871424
},
{
"name": "gpt_neox.layers.17.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16883712
},
{
"name": "gpt_neox.layers.17.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25272320
},
{
"name": "gpt_neox.layers.17.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25276416
},
{
"name": "gpt_neox.layers.17.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25292800
},
{
"name": "gpt_neox.layers.18.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25296896
},
{
"name": "gpt_neox.layers.18.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25300992
},
{
"name": "gpt_neox.layers.18.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25305088
},
{
"name": "gpt_neox.layers.18.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25309184
},
{
"name": "gpt_neox.layers.18.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25313280
}
],
"md5sum": "959b4278248634bc1d36bf764ee9eac2"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.18.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "aa8886f80b34d8f3bdc3d6d32caad484"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.18.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ac2ec07fd034b463dc3c993e6a043271"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.19.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "9c7564b9ff65091721b121fadba64574"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.19.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "62b820ae1b7727969671e615d66c5d1d"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.19.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "37235d7f21ffa06e87f10ae12a99fa77"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.20.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "2bf4b95431e7071409cc84d078e6a65c"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.20.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "35a5ea1a59e74788df302d8503f592b1"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.20.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5224dbd8c457753abe683ad6c2ea3b37"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.21.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c0a33934b7410702c8e3ae6a7f168e91"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 25325568,
"records": [
{
"name": "gpt_neox.layers.18.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.18.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "gpt_neox.layers.18.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "gpt_neox.layers.18.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "gpt_neox.layers.19.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "gpt_neox.layers.19.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "gpt_neox.layers.19.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8421376
},
{
"name": "gpt_neox.layers.19.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8425472
},
{
"name": "gpt_neox.layers.19.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 8429568
},
{
"name": "gpt_neox.layers.19.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8441856
},
{
"name": "gpt_neox.layers.19.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16830464
},
{
"name": "gpt_neox.layers.19.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16834560
},
{
"name": "gpt_neox.layers.19.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "gpt_neox.layers.20.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16855040
},
{
"name": "gpt_neox.layers.20.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16859136
},
{
"name": "gpt_neox.layers.20.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16863232
},
{
"name": "gpt_neox.layers.20.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16867328
},
{
"name": "gpt_neox.layers.20.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 16871424
},
{
"name": "gpt_neox.layers.20.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16883712
},
{
"name": "gpt_neox.layers.20.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25272320
},
{
"name": "gpt_neox.layers.20.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25276416
},
{
"name": "gpt_neox.layers.20.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25292800
},
{
"name": "gpt_neox.layers.21.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25296896
},
{
"name": "gpt_neox.layers.21.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25300992
},
{
"name": "gpt_neox.layers.21.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25305088
},
{
"name": "gpt_neox.layers.21.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25309184
},
{
"name": "gpt_neox.layers.21.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25313280
}
],
"md5sum": "fb78987924454d684bac1b3ac0d37829"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.21.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "da22921dcc6b389d67d8b16d33788c82"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.21.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "a94a9d1929172e22c2987dcda72606ad"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.22.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "36d5791b9f579063f3fa3e876a3f1da9"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.22.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6c148b412af5327503f4febdc05c729a"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.22.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "016f8c0d05ec55e6d1d5b76ec18fce78"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "gpt_neox.layers.23.attention.query_key_value.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "488de942e5bc384681f6db442f530b06"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.23.mlp.dense_h_to_4h.weight",
"shape": [
8192,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f8fcc3142e6b55b263cff50916a9d367"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "gpt_neox.layers.23.mlp.dense_4h_to_h.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "55f4840aaf65e171a34a7ec6d8024d88"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 206045184,
"records": [
{
"name": "embed_out.weight",
"shape": [
50304,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 206045184,
"byteOffset": 0
}
],
"md5sum": "520be5b8c74b70dfe677b2e5cb922484"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 25305088,
"records": [
{
"name": "gpt_neox.layers.21.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "gpt_neox.layers.21.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "gpt_neox.layers.21.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "gpt_neox.layers.21.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "gpt_neox.layers.22.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "gpt_neox.layers.22.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "gpt_neox.layers.22.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8421376
},
{
"name": "gpt_neox.layers.22.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8425472
},
{
"name": "gpt_neox.layers.22.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 8429568
},
{
"name": "gpt_neox.layers.22.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8441856
},
{
"name": "gpt_neox.layers.22.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16830464
},
{
"name": "gpt_neox.layers.22.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16834560
},
{
"name": "gpt_neox.layers.22.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "gpt_neox.layers.23.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16855040
},
{
"name": "gpt_neox.layers.23.input_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16859136
},
{
"name": "gpt_neox.layers.23.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16863232
},
{
"name": "gpt_neox.layers.23.post_attention_layernorm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16867328
},
{
"name": "gpt_neox.layers.23.attention.query_key_value.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 16871424
},
{
"name": "gpt_neox.layers.23.attention.dense.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16883712
},
{
"name": "gpt_neox.layers.23.attention.dense.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25272320
},
{
"name": "gpt_neox.layers.23.mlp.dense_h_to_4h.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25276416
},
{
"name": "gpt_neox.layers.23.mlp.dense_4h_to_h.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25292800
},
{
"name": "gpt_neox.final_layer_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25296896
},
{
"name": "gpt_neox.final_layer_norm.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25300992
}
],
"md5sum": "9d75f956fed0fb0147e95ca9db3156a3"
}
]
}