name: distilbert_mask_filling config_type: model task: mask_filling activation: gelu attention_dropout: 0.1 dim: 768 dropout: 0.1 hidden_dim: 3072 initializer_range: 0.02 max_position_embeddings: 512 n_heads: 12 n_layers: 6 output_past: true pad_token_id: 0 qa_dropout: 0.1 tie_weights_: true vocab_size: 42000