Charlie Ruan
Add weights
96def28
{
"metadata": {
"ParamSize": 149,
"ParamBytes": 435566592.0,
"BitsPerParam": 32.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 46881792,
"records": [
{
"name": "embeddings.word_embeddings.weight",
"shape": [
30522,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 46881792,
"byteOffset": 0
}
],
"md5sum": "2eeaa439340fec525d791ca37a3dd753"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 30332928,
"records": [
{
"name": "embeddings.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 0
},
{
"name": "embeddings.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 1536
},
{
"name": "embeddings.position_embeddings.weight",
"shape": [
512,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 3072
},
{
"name": "embeddings.token_type_embeddings.weight",
"shape": [
2,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 789504
},
{
"name": "encoder.layer.0.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 792576
},
{
"name": "encoder.layer.0.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 794112
},
{
"name": "encoder.layer.0.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 795648
},
{
"name": "encoder.layer.0.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 797184
},
{
"name": "encoder.layer.0.attention.self.qkv.bias",
"shape": [
2304
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 1976832
},
{
"name": "encoder.layer.0.attention.self.qkv.weight",
"shape": [
2304,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 1981440
},
{
"name": "encoder.layer.0.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 5520384
},
{
"name": "encoder.layer.0.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 5526528
},
{
"name": "encoder.layer.0.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 10245120
},
{
"name": "encoder.layer.0.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 10246656
},
{
"name": "encoder.layer.0.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 10248192
},
{
"name": "encoder.layer.0.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 10249728
},
{
"name": "encoder.layer.1.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14968320
},
{
"name": "encoder.layer.1.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14969856
},
{
"name": "encoder.layer.1.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14971392
},
{
"name": "encoder.layer.1.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 14972928
},
{
"name": "encoder.layer.1.attention.self.qkv.bias",
"shape": [
2304
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 16152576
},
{
"name": "encoder.layer.1.attention.self.qkv.weight",
"shape": [
2304,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 16157184
},
{
"name": "encoder.layer.1.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19696128
},
{
"name": "encoder.layer.1.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 19702272
},
{
"name": "encoder.layer.1.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 24420864
},
{
"name": "encoder.layer.1.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 24422400
},
{
"name": "encoder.layer.1.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 24423936
},
{
"name": "encoder.layer.1.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 24425472
},
{
"name": "encoder.layer.10.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 29144064
},
{
"name": "encoder.layer.10.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 29145600
},
{
"name": "encoder.layer.10.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 29147136
},
{
"name": "encoder.layer.10.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 29148672
},
{
"name": "encoder.layer.10.attention.self.qkv.bias",
"shape": [
2304
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 30328320
}
],
"md5sum": "d19301ea1b244630109761e9a47e8c0f"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 31896576,
"records": [
{
"name": "encoder.layer.10.attention.self.qkv.weight",
"shape": [
2304,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 0
},
{
"name": "encoder.layer.10.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 3538944
},
{
"name": "encoder.layer.10.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 3545088
},
{
"name": "encoder.layer.10.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 8263680
},
{
"name": "encoder.layer.10.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 8265216
},
{
"name": "encoder.layer.10.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 8266752
},
{
"name": "encoder.layer.10.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 8268288
},
{
"name": "encoder.layer.11.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 12986880
},
{
"name": "encoder.layer.11.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 12988416
},
{
"name": "encoder.layer.11.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 12989952
},
{
"name": "encoder.layer.11.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 12991488
},
{
"name": "encoder.layer.11.attention.self.qkv.bias",
"shape": [
2304
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 14171136
},
{
"name": "encoder.layer.11.attention.self.qkv.weight",
"shape": [
2304,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 14175744
},
{
"name": "encoder.layer.11.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17714688
},
{
"name": "encoder.layer.11.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17720832
},
{
"name": "encoder.layer.11.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 22439424
},
{
"name": "encoder.layer.11.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 22440960
},
{
"name": "encoder.layer.11.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 22442496
},
{
"name": "encoder.layer.11.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 22444032
},
{
"name": "encoder.layer.2.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 27162624
},
{
"name": "encoder.layer.2.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 27164160
},
{
"name": "encoder.layer.2.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 27165696
},
{
"name": "encoder.layer.2.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 27167232
},
{
"name": "encoder.layer.2.attention.self.qkv.bias",
"shape": [
2304
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 28346880
},
{
"name": "encoder.layer.2.attention.self.qkv.weight",
"shape": [
2304,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 28351488
},
{
"name": "encoder.layer.2.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31890432
}
],
"md5sum": "97adefcd2277d459f53c9bf2d25bf264"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 33074688,
"records": [
{
"name": "encoder.layer.2.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "encoder.layer.2.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4718592
},
{
"name": "encoder.layer.2.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4720128
},
{
"name": "encoder.layer.2.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4721664
},
{
"name": "encoder.layer.2.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 4723200
},
{
"name": "encoder.layer.3.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9441792
},
{
"name": "encoder.layer.3.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9443328
},
{
"name": "encoder.layer.3.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9444864
},
{
"name": "encoder.layer.3.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 9446400
},
{
"name": "encoder.layer.3.attention.self.qkv.bias",
"shape": [
2304
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 10626048
},
{
"name": "encoder.layer.3.attention.self.qkv.weight",
"shape": [
2304,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 10630656
},
{
"name": "encoder.layer.3.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14169600
},
{
"name": "encoder.layer.3.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 14175744
},
{
"name": "encoder.layer.3.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18894336
},
{
"name": "encoder.layer.3.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18895872
},
{
"name": "encoder.layer.3.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18897408
},
{
"name": "encoder.layer.3.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 18898944
},
{
"name": "encoder.layer.4.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 23617536
},
{
"name": "encoder.layer.4.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 23619072
},
{
"name": "encoder.layer.4.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 23620608
},
{
"name": "encoder.layer.4.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 23622144
},
{
"name": "encoder.layer.4.attention.self.qkv.bias",
"shape": [
2304
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 24801792
},
{
"name": "encoder.layer.4.attention.self.qkv.weight",
"shape": [
2304,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 24806400
},
{
"name": "encoder.layer.4.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28345344
},
{
"name": "encoder.layer.4.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28351488
},
{
"name": "encoder.layer.4.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33070080
},
{
"name": "encoder.layer.4.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33071616
},
{
"name": "encoder.layer.4.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33073152
}
],
"md5sum": "e9dd727b06f09c2a5284809b08e3eff9"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 33074688,
"records": [
{
"name": "encoder.layer.4.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "encoder.layer.5.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4718592
},
{
"name": "encoder.layer.5.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4720128
},
{
"name": "encoder.layer.5.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4721664
},
{
"name": "encoder.layer.5.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 4723200
},
{
"name": "encoder.layer.5.attention.self.qkv.bias",
"shape": [
2304
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 5902848
},
{
"name": "encoder.layer.5.attention.self.qkv.weight",
"shape": [
2304,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 5907456
},
{
"name": "encoder.layer.5.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9446400
},
{
"name": "encoder.layer.5.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9452544
},
{
"name": "encoder.layer.5.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14171136
},
{
"name": "encoder.layer.5.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14172672
},
{
"name": "encoder.layer.5.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14174208
},
{
"name": "encoder.layer.5.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 14175744
},
{
"name": "encoder.layer.6.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18894336
},
{
"name": "encoder.layer.6.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18895872
},
{
"name": "encoder.layer.6.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18897408
},
{
"name": "encoder.layer.6.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 18898944
},
{
"name": "encoder.layer.6.attention.self.qkv.bias",
"shape": [
2304
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 20078592
},
{
"name": "encoder.layer.6.attention.self.qkv.weight",
"shape": [
2304,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 20083200
},
{
"name": "encoder.layer.6.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23622144
},
{
"name": "encoder.layer.6.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 23628288
},
{
"name": "encoder.layer.6.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28346880
},
{
"name": "encoder.layer.6.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28348416
},
{
"name": "encoder.layer.6.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28349952
},
{
"name": "encoder.layer.6.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28351488
},
{
"name": "encoder.layer.7.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33070080
},
{
"name": "encoder.layer.7.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33071616
},
{
"name": "encoder.layer.7.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33073152
}
],
"md5sum": "9fbad31fed16b934acb1158c464c9774"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 33080832,
"records": [
{
"name": "encoder.layer.7.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 0
},
{
"name": "encoder.layer.7.attention.self.qkv.bias",
"shape": [
2304
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 1179648
},
{
"name": "encoder.layer.7.attention.self.qkv.weight",
"shape": [
2304,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 1184256
},
{
"name": "encoder.layer.7.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 4723200
},
{
"name": "encoder.layer.7.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 4729344
},
{
"name": "encoder.layer.7.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9447936
},
{
"name": "encoder.layer.7.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9449472
},
{
"name": "encoder.layer.7.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9451008
},
{
"name": "encoder.layer.7.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9452544
},
{
"name": "encoder.layer.8.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14171136
},
{
"name": "encoder.layer.8.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14172672
},
{
"name": "encoder.layer.8.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14174208
},
{
"name": "encoder.layer.8.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 14175744
},
{
"name": "encoder.layer.8.attention.self.qkv.bias",
"shape": [
2304
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 15355392
},
{
"name": "encoder.layer.8.attention.self.qkv.weight",
"shape": [
2304,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15360000
},
{
"name": "encoder.layer.8.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18898944
},
{
"name": "encoder.layer.8.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 18905088
},
{
"name": "encoder.layer.8.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 23623680
},
{
"name": "encoder.layer.8.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 23625216
},
{
"name": "encoder.layer.8.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 23626752
},
{
"name": "encoder.layer.8.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 23628288
},
{
"name": "encoder.layer.9.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28346880
},
{
"name": "encoder.layer.9.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28348416
},
{
"name": "encoder.layer.9.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28349952
},
{
"name": "encoder.layer.9.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 28351488
},
{
"name": "encoder.layer.9.attention.self.qkv.bias",
"shape": [
2304
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 29531136
},
{
"name": "encoder.layer.9.attention.self.qkv.weight",
"shape": [
2304,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 29535744
},
{
"name": "encoder.layer.9.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33074688
}
],
"md5sum": "7a49f1f4cfd562bb32cb568fc622b50e"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 9441792,
"records": [
{
"name": "encoder.layer.9.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "encoder.layer.9.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4718592
},
{
"name": "encoder.layer.9.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4720128
},
{
"name": "encoder.layer.9.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4721664
},
{
"name": "encoder.layer.9.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 4723200
}
],
"md5sum": "2d4a5c41b3ca6eab2b402b5731cbc3cf"
}
]
}