{ "architectures": [ "LeanAlbertForPretraining", "LeanAlbertForTokenClassification", "LeanAlbertForSequenceClassification" ], "model_type": "lean_albert", "num_hidden_layers": 32, "num_hidden_groups": 32, "num_inner_groups": 1, "share_large_matrices": true, "adapter_dim": 32, "hidden_size": 2560, "intermediate_size": 10240, "embedding_size": 256, "num_attention_heads": 64, "vocab_size": 999, "hidden_act": "gelu_new", "hidden_act_gated": true, "sandwich_norm": true, "inner_group_num": 1, "position_embedding_type": "rotary", "hidden_dropout_prob": 0, "classifier_dropout_prob": 0.1, "attention_probs_dropout_prob": 0, "layer_norm_eps": 1e-12, "type_vocab_size": 2, "pad_token_id": 0, "bos_token_id": 2, "eos_token_id": 3 }