model-ya / pytorch_model.bin.index.json
mipatov's picture
Upload LeanAlbertForPreTraining
bb12a3e
{
"metadata": {
"total_size": 34491661488
},
"weight_map": {
"albert.embeddings.embedding_hidden_mapping.bias": "pytorch_model-00001-of-00004.bin",
"albert.embeddings.embedding_hidden_mapping.weight": "pytorch_model-00001-of-00004.bin",
"albert.embeddings.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
"albert.embeddings.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
"albert.embeddings.token_type_embeddings.weight": "pytorch_model-00001-of-00004.bin",
"albert.embeddings.word_embeddings.weight": "pytorch_model-00001-of-00004.bin",
"albert.pooler.bias": "pytorch_model-00004-of-00004.bin",
"albert.pooler.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.0.layers.0.attention.dense_out.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.0.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.0.layers.0.attention.dense_qkv.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.0.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.0.layers.0.attention.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.0.layers.0.attention.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.0.layers.0.ffn.dense_h2o.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.0.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.0.layers.0.ffn.dense_i2h.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.0.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.0.layers.0.ffn.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.0.layers.0.ffn.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.1.layers.0.attention.dense_out.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.1.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.1.layers.0.attention.dense_qkv.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.1.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.1.layers.0.attention.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.1.layers.0.attention.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.1.layers.0.ffn.dense_h2o.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.1.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.1.layers.0.ffn.dense_i2h.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.1.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.1.layers.0.ffn.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.1.layers.0.ffn.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.10.layers.0.attention.dense_out.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.10.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.10.layers.0.attention.dense_qkv.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.10.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.10.layers.0.attention.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.10.layers.0.attention.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.10.layers.0.ffn.dense_h2o.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.10.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.10.layers.0.ffn.dense_i2h.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.10.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.10.layers.0.ffn.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.10.layers.0.ffn.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.11.layers.0.attention.dense_out.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.11.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.11.layers.0.attention.dense_qkv.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.11.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.11.layers.0.attention.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.11.layers.0.attention.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.11.layers.0.ffn.dense_h2o.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.11.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.11.layers.0.ffn.dense_i2h.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.11.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.11.layers.0.ffn.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.11.layers.0.ffn.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.12.layers.0.attention.dense_out.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.12.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.12.layers.0.attention.dense_qkv.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.12.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.12.layers.0.attention.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.12.layers.0.attention.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.12.layers.0.ffn.dense_h2o.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.12.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.12.layers.0.ffn.dense_i2h.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.12.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.12.layers.0.ffn.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.12.layers.0.ffn.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.13.layers.0.attention.dense_out.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.13.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.13.layers.0.attention.dense_qkv.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.13.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.13.layers.0.attention.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.13.layers.0.attention.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.13.layers.0.ffn.dense_h2o.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.13.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.13.layers.0.ffn.dense_i2h.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.13.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.13.layers.0.ffn.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.13.layers.0.ffn.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.14.layers.0.attention.dense_out.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.14.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.14.layers.0.attention.dense_qkv.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.14.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.14.layers.0.attention.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.14.layers.0.attention.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.14.layers.0.ffn.dense_h2o.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.14.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.14.layers.0.ffn.dense_i2h.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.14.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.14.layers.0.ffn.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.14.layers.0.ffn.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.15.layers.0.attention.dense_out.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.15.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.15.layers.0.attention.dense_qkv.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.15.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.15.layers.0.attention.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.15.layers.0.attention.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.15.layers.0.ffn.dense_h2o.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.15.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.15.layers.0.ffn.dense_i2h.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.15.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.15.layers.0.ffn.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.15.layers.0.ffn.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.16.layers.0.attention.dense_out.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.16.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.16.layers.0.attention.dense_qkv.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.16.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.16.layers.0.attention.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.16.layers.0.attention.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.16.layers.0.ffn.dense_h2o.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.16.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.16.layers.0.ffn.dense_i2h.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.16.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.16.layers.0.ffn.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.16.layers.0.ffn.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.17.layers.0.attention.dense_out.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.17.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.17.layers.0.attention.dense_qkv.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.17.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.17.layers.0.attention.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.17.layers.0.attention.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.17.layers.0.ffn.dense_h2o.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.17.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.17.layers.0.ffn.dense_i2h.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.17.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.17.layers.0.ffn.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.17.layers.0.ffn.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.18.layers.0.attention.dense_out.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.18.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.18.layers.0.attention.dense_qkv.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.18.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.18.layers.0.attention.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.18.layers.0.attention.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.18.layers.0.ffn.dense_h2o.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.18.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.18.layers.0.ffn.dense_i2h.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.18.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.18.layers.0.ffn.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.18.layers.0.ffn.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.19.layers.0.attention.dense_out.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.19.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.19.layers.0.attention.dense_qkv.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.19.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.19.layers.0.attention.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.19.layers.0.attention.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.19.layers.0.ffn.dense_h2o.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.19.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.19.layers.0.ffn.dense_i2h.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.19.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.19.layers.0.ffn.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.19.layers.0.ffn.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.2.layers.0.attention.dense_out.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.2.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.2.layers.0.attention.dense_qkv.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.2.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.2.layers.0.attention.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.2.layers.0.attention.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.2.layers.0.ffn.dense_h2o.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.2.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.2.layers.0.ffn.dense_i2h.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.2.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.2.layers.0.ffn.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.2.layers.0.ffn.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.20.layers.0.attention.dense_out.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.20.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.20.layers.0.attention.dense_qkv.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.20.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.20.layers.0.attention.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.20.layers.0.attention.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.20.layers.0.ffn.dense_h2o.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.20.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.20.layers.0.ffn.dense_i2h.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.20.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.20.layers.0.ffn.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.20.layers.0.ffn.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.21.layers.0.attention.dense_out.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.21.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.21.layers.0.attention.dense_qkv.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.21.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.21.layers.0.attention.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.21.layers.0.attention.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.21.layers.0.ffn.dense_h2o.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.21.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.21.layers.0.ffn.dense_i2h.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.21.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.21.layers.0.ffn.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.21.layers.0.ffn.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.22.layers.0.attention.dense_out.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.22.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.22.layers.0.attention.dense_qkv.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.22.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.22.layers.0.attention.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.22.layers.0.attention.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.22.layers.0.ffn.dense_h2o.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.22.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.22.layers.0.ffn.dense_i2h.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.22.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.22.layers.0.ffn.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.22.layers.0.ffn.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.23.layers.0.attention.dense_out.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.23.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.23.layers.0.attention.dense_qkv.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.23.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.23.layers.0.attention.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.23.layers.0.attention.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.23.layers.0.ffn.dense_h2o.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.23.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.23.layers.0.ffn.dense_i2h.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.23.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.23.layers.0.ffn.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.23.layers.0.ffn.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.24.layers.0.attention.dense_out.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.24.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.24.layers.0.attention.dense_qkv.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.24.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.24.layers.0.attention.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.24.layers.0.attention.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.24.layers.0.ffn.dense_h2o.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.24.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.24.layers.0.ffn.dense_i2h.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.24.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.24.layers.0.ffn.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.24.layers.0.ffn.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.25.layers.0.attention.dense_out.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.25.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.25.layers.0.attention.dense_qkv.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.25.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.25.layers.0.attention.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.25.layers.0.attention.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.25.layers.0.ffn.dense_h2o.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.25.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.25.layers.0.ffn.dense_i2h.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.25.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.25.layers.0.ffn.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.25.layers.0.ffn.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.26.layers.0.attention.dense_out.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.26.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.26.layers.0.attention.dense_qkv.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.26.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.26.layers.0.attention.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.26.layers.0.attention.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.26.layers.0.ffn.dense_h2o.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.26.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.26.layers.0.ffn.dense_i2h.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.26.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.26.layers.0.ffn.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.26.layers.0.ffn.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.27.layers.0.attention.dense_out.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.27.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.27.layers.0.attention.dense_qkv.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.27.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.27.layers.0.attention.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.27.layers.0.attention.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.27.layers.0.ffn.dense_h2o.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.27.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.27.layers.0.ffn.dense_i2h.bias": "pytorch_model-00003-of-00004.bin",
"albert.transformer.layer_groups.27.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.27.layers.0.ffn.layer_norm.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.27.layers.0.ffn.layer_norm.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.28.layers.0.attention.dense_out.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.28.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.28.layers.0.attention.dense_qkv.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.28.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.28.layers.0.attention.layer_norm.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.28.layers.0.attention.layer_norm.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.28.layers.0.ffn.dense_h2o.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.28.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.28.layers.0.ffn.dense_i2h.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.28.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.28.layers.0.ffn.layer_norm.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.28.layers.0.ffn.layer_norm.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.29.layers.0.attention.dense_out.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.29.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.29.layers.0.attention.dense_qkv.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.29.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.29.layers.0.attention.layer_norm.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.29.layers.0.attention.layer_norm.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.29.layers.0.ffn.dense_h2o.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.29.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.29.layers.0.ffn.dense_i2h.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.29.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.29.layers.0.ffn.layer_norm.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.29.layers.0.ffn.layer_norm.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.3.layers.0.attention.dense_out.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.3.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.3.layers.0.attention.dense_qkv.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.3.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.3.layers.0.attention.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.3.layers.0.attention.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.3.layers.0.ffn.dense_h2o.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.3.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.3.layers.0.ffn.dense_i2h.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.3.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.3.layers.0.ffn.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.3.layers.0.ffn.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.30.layers.0.attention.dense_out.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.30.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.30.layers.0.attention.dense_qkv.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.30.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.30.layers.0.attention.layer_norm.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.30.layers.0.attention.layer_norm.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.30.layers.0.ffn.dense_h2o.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.30.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.30.layers.0.ffn.dense_i2h.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.30.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.30.layers.0.ffn.layer_norm.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.30.layers.0.ffn.layer_norm.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.31.layers.0.attention.dense_out.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.31.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.31.layers.0.attention.dense_qkv.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.31.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.31.layers.0.attention.layer_norm.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.31.layers.0.attention.layer_norm.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.31.layers.0.ffn.dense_h2o.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.31.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.31.layers.0.ffn.dense_i2h.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.31.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.31.layers.0.ffn.layer_norm.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.31.layers.0.ffn.layer_norm.weight": "pytorch_model-00004-of-00004.bin",
"albert.transformer.layer_groups.4.layers.0.attention.dense_out.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.4.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.4.layers.0.attention.dense_qkv.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.4.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.4.layers.0.attention.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.4.layers.0.attention.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.4.layers.0.ffn.dense_h2o.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.4.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.4.layers.0.ffn.dense_i2h.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.4.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.4.layers.0.ffn.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.4.layers.0.ffn.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.5.layers.0.attention.dense_out.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.5.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.5.layers.0.attention.dense_qkv.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.5.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.5.layers.0.attention.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.5.layers.0.attention.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.5.layers.0.ffn.dense_h2o.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.5.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.5.layers.0.ffn.dense_i2h.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.5.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.5.layers.0.ffn.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.5.layers.0.ffn.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.6.layers.0.attention.dense_out.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.6.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.6.layers.0.attention.dense_qkv.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.6.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.6.layers.0.attention.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.6.layers.0.attention.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.6.layers.0.ffn.dense_h2o.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.6.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.6.layers.0.ffn.dense_i2h.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.6.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.6.layers.0.ffn.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.6.layers.0.ffn.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.7.layers.0.attention.dense_out.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.7.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.7.layers.0.attention.dense_qkv.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.7.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.7.layers.0.attention.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.7.layers.0.attention.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.7.layers.0.ffn.dense_h2o.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.7.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.7.layers.0.ffn.dense_i2h.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.7.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.7.layers.0.ffn.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.7.layers.0.ffn.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.8.layers.0.attention.dense_out.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.8.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.8.layers.0.attention.dense_qkv.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.8.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.8.layers.0.attention.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.8.layers.0.attention.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.8.layers.0.ffn.dense_h2o.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.8.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.8.layers.0.ffn.dense_i2h.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.8.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.8.layers.0.ffn.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.8.layers.0.ffn.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.9.layers.0.attention.dense_out.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.9.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.9.layers.0.attention.dense_qkv.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.9.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.9.layers.0.attention.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.9.layers.0.attention.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.9.layers.0.ffn.dense_h2o.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.9.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.9.layers.0.ffn.dense_i2h.bias": "pytorch_model-00001-of-00004.bin",
"albert.transformer.layer_groups.9.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.9.layers.0.ffn.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
"albert.transformer.layer_groups.9.layers.0.ffn.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
"albert.transformer.post_layer_norm.bias": "pytorch_model-00004-of-00004.bin",
"albert.transformer.post_layer_norm.weight": "pytorch_model-00004-of-00004.bin",
"predictions.bias": "pytorch_model-00004-of-00004.bin",
"predictions.decoder.bias": "pytorch_model-00004-of-00004.bin",
"predictions.decoder.weight": "pytorch_model-00004-of-00004.bin",
"predictions.dense.bias": "pytorch_model-00004-of-00004.bin",
"predictions.dense.weight": "pytorch_model-00004-of-00004.bin",
"predictions.layer_norm.bias": "pytorch_model-00004-of-00004.bin",
"predictions.layer_norm.weight": "pytorch_model-00004-of-00004.bin",
"sop_classifier.classifier.bias": "pytorch_model-00004-of-00004.bin",
"sop_classifier.classifier.weight": "pytorch_model-00004-of-00004.bin"
}
}