model-ya / pytorch_model.bin.index.json

Upload LeanAlbertForPreTraining

bb12a3e almost 2 years ago

No virus

45.6 kB

	{
	"metadata": {
	"total_size": 34491661488
	},
	"weight_map": {
	"albert.embeddings.embedding_hidden_mapping.bias": "pytorch_model-00001-of-00004.bin",
	"albert.embeddings.embedding_hidden_mapping.weight": "pytorch_model-00001-of-00004.bin",
	"albert.embeddings.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
	"albert.embeddings.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
	"albert.embeddings.token_type_embeddings.weight": "pytorch_model-00001-of-00004.bin",
	"albert.embeddings.word_embeddings.weight": "pytorch_model-00001-of-00004.bin",
	"albert.pooler.bias": "pytorch_model-00004-of-00004.bin",
	"albert.pooler.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.0.layers.0.attention.dense_out.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.0.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.0.layers.0.attention.dense_qkv.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.0.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.0.layers.0.attention.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.0.layers.0.attention.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.0.layers.0.ffn.dense_h2o.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.0.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.0.layers.0.ffn.dense_i2h.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.0.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.0.layers.0.ffn.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.0.layers.0.ffn.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.1.layers.0.attention.dense_out.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.1.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.1.layers.0.attention.dense_qkv.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.1.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.1.layers.0.attention.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.1.layers.0.attention.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.1.layers.0.ffn.dense_h2o.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.1.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.1.layers.0.ffn.dense_i2h.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.1.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.1.layers.0.ffn.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.1.layers.0.ffn.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.10.layers.0.attention.dense_out.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.10.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.10.layers.0.attention.dense_qkv.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.10.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.10.layers.0.attention.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.10.layers.0.attention.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.10.layers.0.ffn.dense_h2o.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.10.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.10.layers.0.ffn.dense_i2h.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.10.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.10.layers.0.ffn.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.10.layers.0.ffn.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.11.layers.0.attention.dense_out.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.11.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.11.layers.0.attention.dense_qkv.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.11.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.11.layers.0.attention.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.11.layers.0.attention.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.11.layers.0.ffn.dense_h2o.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.11.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.11.layers.0.ffn.dense_i2h.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.11.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.11.layers.0.ffn.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.11.layers.0.ffn.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.12.layers.0.attention.dense_out.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.12.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.12.layers.0.attention.dense_qkv.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.12.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.12.layers.0.attention.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.12.layers.0.attention.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.12.layers.0.ffn.dense_h2o.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.12.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.12.layers.0.ffn.dense_i2h.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.12.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.12.layers.0.ffn.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.12.layers.0.ffn.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.13.layers.0.attention.dense_out.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.13.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.13.layers.0.attention.dense_qkv.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.13.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.13.layers.0.attention.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.13.layers.0.attention.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.13.layers.0.ffn.dense_h2o.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.13.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.13.layers.0.ffn.dense_i2h.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.13.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.13.layers.0.ffn.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.13.layers.0.ffn.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.14.layers.0.attention.dense_out.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.14.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.14.layers.0.attention.dense_qkv.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.14.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.14.layers.0.attention.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.14.layers.0.attention.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.14.layers.0.ffn.dense_h2o.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.14.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.14.layers.0.ffn.dense_i2h.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.14.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.14.layers.0.ffn.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.14.layers.0.ffn.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.15.layers.0.attention.dense_out.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.15.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.15.layers.0.attention.dense_qkv.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.15.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.15.layers.0.attention.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.15.layers.0.attention.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.15.layers.0.ffn.dense_h2o.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.15.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.15.layers.0.ffn.dense_i2h.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.15.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.15.layers.0.ffn.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.15.layers.0.ffn.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.16.layers.0.attention.dense_out.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.16.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.16.layers.0.attention.dense_qkv.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.16.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.16.layers.0.attention.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.16.layers.0.attention.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.16.layers.0.ffn.dense_h2o.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.16.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.16.layers.0.ffn.dense_i2h.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.16.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.16.layers.0.ffn.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.16.layers.0.ffn.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.17.layers.0.attention.dense_out.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.17.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.17.layers.0.attention.dense_qkv.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.17.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.17.layers.0.attention.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.17.layers.0.attention.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.17.layers.0.ffn.dense_h2o.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.17.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.17.layers.0.ffn.dense_i2h.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.17.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.17.layers.0.ffn.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.17.layers.0.ffn.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.18.layers.0.attention.dense_out.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.18.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.18.layers.0.attention.dense_qkv.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.18.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.18.layers.0.attention.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.18.layers.0.attention.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.18.layers.0.ffn.dense_h2o.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.18.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.18.layers.0.ffn.dense_i2h.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.18.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.18.layers.0.ffn.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.18.layers.0.ffn.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.19.layers.0.attention.dense_out.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.19.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.19.layers.0.attention.dense_qkv.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.19.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.19.layers.0.attention.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.19.layers.0.attention.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.19.layers.0.ffn.dense_h2o.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.19.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.19.layers.0.ffn.dense_i2h.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.19.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.19.layers.0.ffn.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.19.layers.0.ffn.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.2.layers.0.attention.dense_out.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.2.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.2.layers.0.attention.dense_qkv.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.2.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.2.layers.0.attention.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.2.layers.0.attention.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.2.layers.0.ffn.dense_h2o.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.2.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.2.layers.0.ffn.dense_i2h.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.2.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.2.layers.0.ffn.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.2.layers.0.ffn.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.20.layers.0.attention.dense_out.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.20.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.20.layers.0.attention.dense_qkv.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.20.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.20.layers.0.attention.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.20.layers.0.attention.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.20.layers.0.ffn.dense_h2o.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.20.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.20.layers.0.ffn.dense_i2h.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.20.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.20.layers.0.ffn.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.20.layers.0.ffn.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.21.layers.0.attention.dense_out.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.21.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.21.layers.0.attention.dense_qkv.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.21.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.21.layers.0.attention.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.21.layers.0.attention.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.21.layers.0.ffn.dense_h2o.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.21.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.21.layers.0.ffn.dense_i2h.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.21.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.21.layers.0.ffn.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.21.layers.0.ffn.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.22.layers.0.attention.dense_out.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.22.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.22.layers.0.attention.dense_qkv.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.22.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.22.layers.0.attention.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.22.layers.0.attention.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.22.layers.0.ffn.dense_h2o.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.22.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.22.layers.0.ffn.dense_i2h.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.22.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.22.layers.0.ffn.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.22.layers.0.ffn.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.23.layers.0.attention.dense_out.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.23.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.23.layers.0.attention.dense_qkv.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.23.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.23.layers.0.attention.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.23.layers.0.attention.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.23.layers.0.ffn.dense_h2o.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.23.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.23.layers.0.ffn.dense_i2h.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.23.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.23.layers.0.ffn.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.23.layers.0.ffn.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.24.layers.0.attention.dense_out.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.24.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.24.layers.0.attention.dense_qkv.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.24.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.24.layers.0.attention.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.24.layers.0.attention.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.24.layers.0.ffn.dense_h2o.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.24.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.24.layers.0.ffn.dense_i2h.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.24.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.24.layers.0.ffn.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.24.layers.0.ffn.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.25.layers.0.attention.dense_out.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.25.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.25.layers.0.attention.dense_qkv.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.25.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.25.layers.0.attention.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.25.layers.0.attention.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.25.layers.0.ffn.dense_h2o.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.25.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.25.layers.0.ffn.dense_i2h.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.25.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.25.layers.0.ffn.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.25.layers.0.ffn.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.26.layers.0.attention.dense_out.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.26.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.26.layers.0.attention.dense_qkv.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.26.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.26.layers.0.attention.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.26.layers.0.attention.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.26.layers.0.ffn.dense_h2o.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.26.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.26.layers.0.ffn.dense_i2h.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.26.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.26.layers.0.ffn.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.26.layers.0.ffn.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.27.layers.0.attention.dense_out.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.27.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.27.layers.0.attention.dense_qkv.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.27.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.27.layers.0.attention.layer_norm.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.27.layers.0.attention.layer_norm.weight": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.27.layers.0.ffn.dense_h2o.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.27.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.27.layers.0.ffn.dense_i2h.bias": "pytorch_model-00003-of-00004.bin",
	"albert.transformer.layer_groups.27.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.27.layers.0.ffn.layer_norm.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.27.layers.0.ffn.layer_norm.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.28.layers.0.attention.dense_out.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.28.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.28.layers.0.attention.dense_qkv.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.28.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.28.layers.0.attention.layer_norm.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.28.layers.0.attention.layer_norm.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.28.layers.0.ffn.dense_h2o.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.28.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.28.layers.0.ffn.dense_i2h.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.28.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.28.layers.0.ffn.layer_norm.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.28.layers.0.ffn.layer_norm.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.29.layers.0.attention.dense_out.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.29.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.29.layers.0.attention.dense_qkv.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.29.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.29.layers.0.attention.layer_norm.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.29.layers.0.attention.layer_norm.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.29.layers.0.ffn.dense_h2o.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.29.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.29.layers.0.ffn.dense_i2h.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.29.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.29.layers.0.ffn.layer_norm.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.29.layers.0.ffn.layer_norm.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.3.layers.0.attention.dense_out.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.3.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.3.layers.0.attention.dense_qkv.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.3.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.3.layers.0.attention.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.3.layers.0.attention.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.3.layers.0.ffn.dense_h2o.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.3.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.3.layers.0.ffn.dense_i2h.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.3.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.3.layers.0.ffn.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.3.layers.0.ffn.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.30.layers.0.attention.dense_out.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.30.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.30.layers.0.attention.dense_qkv.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.30.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.30.layers.0.attention.layer_norm.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.30.layers.0.attention.layer_norm.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.30.layers.0.ffn.dense_h2o.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.30.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.30.layers.0.ffn.dense_i2h.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.30.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.30.layers.0.ffn.layer_norm.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.30.layers.0.ffn.layer_norm.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.31.layers.0.attention.dense_out.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.31.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.31.layers.0.attention.dense_qkv.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.31.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.31.layers.0.attention.layer_norm.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.31.layers.0.attention.layer_norm.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.31.layers.0.ffn.dense_h2o.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.31.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.31.layers.0.ffn.dense_i2h.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.31.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.31.layers.0.ffn.layer_norm.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.31.layers.0.ffn.layer_norm.weight": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.layer_groups.4.layers.0.attention.dense_out.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.4.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.4.layers.0.attention.dense_qkv.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.4.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.4.layers.0.attention.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.4.layers.0.attention.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.4.layers.0.ffn.dense_h2o.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.4.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.4.layers.0.ffn.dense_i2h.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.4.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.4.layers.0.ffn.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.4.layers.0.ffn.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.5.layers.0.attention.dense_out.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.5.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.5.layers.0.attention.dense_qkv.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.5.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.5.layers.0.attention.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.5.layers.0.attention.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.5.layers.0.ffn.dense_h2o.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.5.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.5.layers.0.ffn.dense_i2h.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.5.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.5.layers.0.ffn.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.5.layers.0.ffn.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.6.layers.0.attention.dense_out.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.6.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.6.layers.0.attention.dense_qkv.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.6.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.6.layers.0.attention.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.6.layers.0.attention.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.6.layers.0.ffn.dense_h2o.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.6.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.6.layers.0.ffn.dense_i2h.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.6.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.6.layers.0.ffn.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.6.layers.0.ffn.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.7.layers.0.attention.dense_out.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.7.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.7.layers.0.attention.dense_qkv.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.7.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.7.layers.0.attention.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.7.layers.0.attention.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.7.layers.0.ffn.dense_h2o.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.7.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.7.layers.0.ffn.dense_i2h.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.7.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.7.layers.0.ffn.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.7.layers.0.ffn.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.8.layers.0.attention.dense_out.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.8.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.8.layers.0.attention.dense_qkv.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.8.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.8.layers.0.attention.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.8.layers.0.attention.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.8.layers.0.ffn.dense_h2o.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.8.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.8.layers.0.ffn.dense_i2h.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.8.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.8.layers.0.ffn.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.8.layers.0.ffn.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.9.layers.0.attention.dense_out.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.9.layers.0.attention.dense_out.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.9.layers.0.attention.dense_qkv.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.9.layers.0.attention.dense_qkv.shared_matrix.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.9.layers.0.attention.layer_norm.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.9.layers.0.attention.layer_norm.weight": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.9.layers.0.ffn.dense_h2o.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.9.layers.0.ffn.dense_h2o.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.9.layers.0.ffn.dense_i2h.bias": "pytorch_model-00001-of-00004.bin",
	"albert.transformer.layer_groups.9.layers.0.ffn.dense_i2h.shared_matrix.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.9.layers.0.ffn.layer_norm.bias": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.layer_groups.9.layers.0.ffn.layer_norm.weight": "pytorch_model-00002-of-00004.bin",
	"albert.transformer.post_layer_norm.bias": "pytorch_model-00004-of-00004.bin",
	"albert.transformer.post_layer_norm.weight": "pytorch_model-00004-of-00004.bin",
	"predictions.bias": "pytorch_model-00004-of-00004.bin",
	"predictions.decoder.bias": "pytorch_model-00004-of-00004.bin",
	"predictions.decoder.weight": "pytorch_model-00004-of-00004.bin",
	"predictions.dense.bias": "pytorch_model-00004-of-00004.bin",
	"predictions.dense.weight": "pytorch_model-00004-of-00004.bin",
	"predictions.layer_norm.bias": "pytorch_model-00004-of-00004.bin",
	"predictions.layer_norm.weight": "pytorch_model-00004-of-00004.bin",
	"sop_classifier.classifier.bias": "pytorch_model-00004-of-00004.bin",
	"sop_classifier.classifier.weight": "pytorch_model-00004-of-00004.bin"
	}
	}