attention_dropout: 0.0 | |
bos_token_id: 12 | |
eos_token_id: 13 | |
hidden_size: 768 | |
intermediate_size: 768 | |
learning_rate: 0.0001 | |
max_epochs: 7 | |
max_position_embeddings: 512 | |
num_attention_heads: 8 | |
num_hidden_layers: 7 | |
num_labels: 105 | |
pad_token_id: 0 | |
steps_per_epoch: 375000 | |
vocab_size: 591 | |
warmup_epochs: 1 | |