attention_dropout: 0.0 bos_token_id: 12 eos_token_id: 13 hidden_size: 768 intermediate_size: 768 learning_rate: 0.0001 max_epochs: 7 max_position_embeddings: 512 num_attention_heads: 8 num_hidden_layers: 7 num_labels: 105 pad_token_id: 0 steps_per_epoch: 375000 vocab_size: 591 warmup_epochs: 1