|
{ |
|
"metadata": { |
|
"ParamSize": 149, |
|
"ParamBytes": 435566592.0, |
|
"BitsPerParam": 32.0 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46881792, |
|
"records": [ |
|
{ |
|
"name": "embeddings.word_embeddings.weight", |
|
"shape": [ |
|
30522, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46881792, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2eeaa439340fec525d791ca37a3dd753" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30332928, |
|
"records": [ |
|
{ |
|
"name": "embeddings.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "embeddings.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 1536 |
|
}, |
|
{ |
|
"name": "embeddings.position_embeddings.weight", |
|
"shape": [ |
|
512, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 3072 |
|
}, |
|
{ |
|
"name": "embeddings.token_type_embeddings.weight", |
|
"shape": [ |
|
2, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3072, |
|
"byteOffset": 789504 |
|
}, |
|
{ |
|
"name": "encoder.layer.0.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 792576 |
|
}, |
|
{ |
|
"name": "encoder.layer.0.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 794112 |
|
}, |
|
{ |
|
"name": "encoder.layer.0.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 795648 |
|
}, |
|
{ |
|
"name": "encoder.layer.0.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 797184 |
|
}, |
|
{ |
|
"name": "encoder.layer.0.attention.self.qkv.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 1976832 |
|
}, |
|
{ |
|
"name": "encoder.layer.0.attention.self.qkv.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 1981440 |
|
}, |
|
{ |
|
"name": "encoder.layer.0.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 5520384 |
|
}, |
|
{ |
|
"name": "encoder.layer.0.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 5526528 |
|
}, |
|
{ |
|
"name": "encoder.layer.0.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 10245120 |
|
}, |
|
{ |
|
"name": "encoder.layer.0.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 10246656 |
|
}, |
|
{ |
|
"name": "encoder.layer.0.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 10248192 |
|
}, |
|
{ |
|
"name": "encoder.layer.0.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 10249728 |
|
}, |
|
{ |
|
"name": "encoder.layer.1.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14968320 |
|
}, |
|
{ |
|
"name": "encoder.layer.1.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14969856 |
|
}, |
|
{ |
|
"name": "encoder.layer.1.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14971392 |
|
}, |
|
{ |
|
"name": "encoder.layer.1.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 14972928 |
|
}, |
|
{ |
|
"name": "encoder.layer.1.attention.self.qkv.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 16152576 |
|
}, |
|
{ |
|
"name": "encoder.layer.1.attention.self.qkv.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 16157184 |
|
}, |
|
{ |
|
"name": "encoder.layer.1.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19696128 |
|
}, |
|
{ |
|
"name": "encoder.layer.1.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 19702272 |
|
}, |
|
{ |
|
"name": "encoder.layer.1.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 24420864 |
|
}, |
|
{ |
|
"name": "encoder.layer.1.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 24422400 |
|
}, |
|
{ |
|
"name": "encoder.layer.1.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 24423936 |
|
}, |
|
{ |
|
"name": "encoder.layer.1.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 24425472 |
|
}, |
|
{ |
|
"name": "encoder.layer.10.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 29144064 |
|
}, |
|
{ |
|
"name": "encoder.layer.10.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 29145600 |
|
}, |
|
{ |
|
"name": "encoder.layer.10.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 29147136 |
|
}, |
|
{ |
|
"name": "encoder.layer.10.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 29148672 |
|
}, |
|
{ |
|
"name": "encoder.layer.10.attention.self.qkv.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 30328320 |
|
} |
|
], |
|
"md5sum": "d19301ea1b244630109761e9a47e8c0f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31896576, |
|
"records": [ |
|
{ |
|
"name": "encoder.layer.10.attention.self.qkv.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "encoder.layer.10.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 3538944 |
|
}, |
|
{ |
|
"name": "encoder.layer.10.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 3545088 |
|
}, |
|
{ |
|
"name": "encoder.layer.10.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 8263680 |
|
}, |
|
{ |
|
"name": "encoder.layer.10.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 8265216 |
|
}, |
|
{ |
|
"name": "encoder.layer.10.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 8266752 |
|
}, |
|
{ |
|
"name": "encoder.layer.10.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 8268288 |
|
}, |
|
{ |
|
"name": "encoder.layer.11.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 12986880 |
|
}, |
|
{ |
|
"name": "encoder.layer.11.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 12988416 |
|
}, |
|
{ |
|
"name": "encoder.layer.11.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 12989952 |
|
}, |
|
{ |
|
"name": "encoder.layer.11.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 12991488 |
|
}, |
|
{ |
|
"name": "encoder.layer.11.attention.self.qkv.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 14171136 |
|
}, |
|
{ |
|
"name": "encoder.layer.11.attention.self.qkv.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 14175744 |
|
}, |
|
{ |
|
"name": "encoder.layer.11.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 17714688 |
|
}, |
|
{ |
|
"name": "encoder.layer.11.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 17720832 |
|
}, |
|
{ |
|
"name": "encoder.layer.11.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 22439424 |
|
}, |
|
{ |
|
"name": "encoder.layer.11.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 22440960 |
|
}, |
|
{ |
|
"name": "encoder.layer.11.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 22442496 |
|
}, |
|
{ |
|
"name": "encoder.layer.11.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 22444032 |
|
}, |
|
{ |
|
"name": "encoder.layer.2.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 27162624 |
|
}, |
|
{ |
|
"name": "encoder.layer.2.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 27164160 |
|
}, |
|
{ |
|
"name": "encoder.layer.2.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 27165696 |
|
}, |
|
{ |
|
"name": "encoder.layer.2.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 27167232 |
|
}, |
|
{ |
|
"name": "encoder.layer.2.attention.self.qkv.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 28346880 |
|
}, |
|
{ |
|
"name": "encoder.layer.2.attention.self.qkv.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 28351488 |
|
}, |
|
{ |
|
"name": "encoder.layer.2.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 31890432 |
|
} |
|
], |
|
"md5sum": "97adefcd2277d459f53c9bf2d25bf264" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33074688, |
|
"records": [ |
|
{ |
|
"name": "encoder.layer.2.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "encoder.layer.2.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "encoder.layer.2.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4720128 |
|
}, |
|
{ |
|
"name": "encoder.layer.2.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4721664 |
|
}, |
|
{ |
|
"name": "encoder.layer.2.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 4723200 |
|
}, |
|
{ |
|
"name": "encoder.layer.3.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9441792 |
|
}, |
|
{ |
|
"name": "encoder.layer.3.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9443328 |
|
}, |
|
{ |
|
"name": "encoder.layer.3.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9444864 |
|
}, |
|
{ |
|
"name": "encoder.layer.3.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 9446400 |
|
}, |
|
{ |
|
"name": "encoder.layer.3.attention.self.qkv.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 10626048 |
|
}, |
|
{ |
|
"name": "encoder.layer.3.attention.self.qkv.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 10630656 |
|
}, |
|
{ |
|
"name": "encoder.layer.3.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 14169600 |
|
}, |
|
{ |
|
"name": "encoder.layer.3.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 14175744 |
|
}, |
|
{ |
|
"name": "encoder.layer.3.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 18894336 |
|
}, |
|
{ |
|
"name": "encoder.layer.3.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 18895872 |
|
}, |
|
{ |
|
"name": "encoder.layer.3.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 18897408 |
|
}, |
|
{ |
|
"name": "encoder.layer.3.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "encoder.layer.4.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 23617536 |
|
}, |
|
{ |
|
"name": "encoder.layer.4.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 23619072 |
|
}, |
|
{ |
|
"name": "encoder.layer.4.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 23620608 |
|
}, |
|
{ |
|
"name": "encoder.layer.4.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 23622144 |
|
}, |
|
{ |
|
"name": "encoder.layer.4.attention.self.qkv.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 24801792 |
|
}, |
|
{ |
|
"name": "encoder.layer.4.attention.self.qkv.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 24806400 |
|
}, |
|
{ |
|
"name": "encoder.layer.4.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 28345344 |
|
}, |
|
{ |
|
"name": "encoder.layer.4.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 28351488 |
|
}, |
|
{ |
|
"name": "encoder.layer.4.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 33070080 |
|
}, |
|
{ |
|
"name": "encoder.layer.4.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 33071616 |
|
}, |
|
{ |
|
"name": "encoder.layer.4.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 33073152 |
|
} |
|
], |
|
"md5sum": "e9dd727b06f09c2a5284809b08e3eff9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33074688, |
|
"records": [ |
|
{ |
|
"name": "encoder.layer.4.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "encoder.layer.5.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "encoder.layer.5.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4720128 |
|
}, |
|
{ |
|
"name": "encoder.layer.5.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4721664 |
|
}, |
|
{ |
|
"name": "encoder.layer.5.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 4723200 |
|
}, |
|
{ |
|
"name": "encoder.layer.5.attention.self.qkv.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 5902848 |
|
}, |
|
{ |
|
"name": "encoder.layer.5.attention.self.qkv.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 5907456 |
|
}, |
|
{ |
|
"name": "encoder.layer.5.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 9446400 |
|
}, |
|
{ |
|
"name": "encoder.layer.5.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 9452544 |
|
}, |
|
{ |
|
"name": "encoder.layer.5.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14171136 |
|
}, |
|
{ |
|
"name": "encoder.layer.5.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14172672 |
|
}, |
|
{ |
|
"name": "encoder.layer.5.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14174208 |
|
}, |
|
{ |
|
"name": "encoder.layer.5.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 14175744 |
|
}, |
|
{ |
|
"name": "encoder.layer.6.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 18894336 |
|
}, |
|
{ |
|
"name": "encoder.layer.6.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 18895872 |
|
}, |
|
{ |
|
"name": "encoder.layer.6.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 18897408 |
|
}, |
|
{ |
|
"name": "encoder.layer.6.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "encoder.layer.6.attention.self.qkv.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 20078592 |
|
}, |
|
{ |
|
"name": "encoder.layer.6.attention.self.qkv.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 20083200 |
|
}, |
|
{ |
|
"name": "encoder.layer.6.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23622144 |
|
}, |
|
{ |
|
"name": "encoder.layer.6.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 23628288 |
|
}, |
|
{ |
|
"name": "encoder.layer.6.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 28346880 |
|
}, |
|
{ |
|
"name": "encoder.layer.6.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 28348416 |
|
}, |
|
{ |
|
"name": "encoder.layer.6.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 28349952 |
|
}, |
|
{ |
|
"name": "encoder.layer.6.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 28351488 |
|
}, |
|
{ |
|
"name": "encoder.layer.7.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 33070080 |
|
}, |
|
{ |
|
"name": "encoder.layer.7.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 33071616 |
|
}, |
|
{ |
|
"name": "encoder.layer.7.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 33073152 |
|
} |
|
], |
|
"md5sum": "9fbad31fed16b934acb1158c464c9774" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33080832, |
|
"records": [ |
|
{ |
|
"name": "encoder.layer.7.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "encoder.layer.7.attention.self.qkv.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 1179648 |
|
}, |
|
{ |
|
"name": "encoder.layer.7.attention.self.qkv.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 1184256 |
|
}, |
|
{ |
|
"name": "encoder.layer.7.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 4723200 |
|
}, |
|
{ |
|
"name": "encoder.layer.7.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 4729344 |
|
}, |
|
{ |
|
"name": "encoder.layer.7.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9447936 |
|
}, |
|
{ |
|
"name": "encoder.layer.7.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9449472 |
|
}, |
|
{ |
|
"name": "encoder.layer.7.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9451008 |
|
}, |
|
{ |
|
"name": "encoder.layer.7.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 9452544 |
|
}, |
|
{ |
|
"name": "encoder.layer.8.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14171136 |
|
}, |
|
{ |
|
"name": "encoder.layer.8.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14172672 |
|
}, |
|
{ |
|
"name": "encoder.layer.8.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14174208 |
|
}, |
|
{ |
|
"name": "encoder.layer.8.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 14175744 |
|
}, |
|
{ |
|
"name": "encoder.layer.8.attention.self.qkv.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 15355392 |
|
}, |
|
{ |
|
"name": "encoder.layer.8.attention.self.qkv.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 15360000 |
|
}, |
|
{ |
|
"name": "encoder.layer.8.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "encoder.layer.8.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 18905088 |
|
}, |
|
{ |
|
"name": "encoder.layer.8.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 23623680 |
|
}, |
|
{ |
|
"name": "encoder.layer.8.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 23625216 |
|
}, |
|
{ |
|
"name": "encoder.layer.8.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 23626752 |
|
}, |
|
{ |
|
"name": "encoder.layer.8.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 23628288 |
|
}, |
|
{ |
|
"name": "encoder.layer.9.attention.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 28346880 |
|
}, |
|
{ |
|
"name": "encoder.layer.9.attention.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 28348416 |
|
}, |
|
{ |
|
"name": "encoder.layer.9.attention.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 28349952 |
|
}, |
|
{ |
|
"name": "encoder.layer.9.attention.output.dense.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 28351488 |
|
}, |
|
{ |
|
"name": "encoder.layer.9.attention.self.qkv.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 29531136 |
|
}, |
|
{ |
|
"name": "encoder.layer.9.attention.self.qkv.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 29535744 |
|
}, |
|
{ |
|
"name": "encoder.layer.9.intermediate.dense.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 33074688 |
|
} |
|
], |
|
"md5sum": "7a49f1f4cfd562bb32cb568fc622b50e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 9441792, |
|
"records": [ |
|
{ |
|
"name": "encoder.layer.9.intermediate.dense.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "encoder.layer.9.output.LayerNorm.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "encoder.layer.9.output.LayerNorm.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4720128 |
|
}, |
|
{ |
|
"name": "encoder.layer.9.output.dense.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4721664 |
|
}, |
|
{ |
|
"name": "encoder.layer.9.output.dense.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 4723200 |
|
} |
|
], |
|
"md5sum": "2d4a5c41b3ca6eab2b402b5731cbc3cf" |
|
} |
|
] |
|
} |