|
data: exp/dataset.de-en |
|
save_model: exp/model.de-en |
|
save_checkpoint_steps: 10000 |
|
keep_checkpoint: 10 |
|
seed: 3435 |
|
train_steps: 500000 |
|
valid_steps: 10000 |
|
warmup_steps: 8000 |
|
report_every: 100 |
|
|
|
decoder_type: transformer |
|
encoder_type: transformer |
|
word_vec_size: 512 |
|
rnn_size: 512 |
|
layers: 6 |
|
transformer_ff: 2048 |
|
heads: 8 |
|
|
|
accum_count: 8 |
|
optim: adam |
|
adam_beta1: 0.9 |
|
adam_beta2: 0.998 |
|
decay_method: noam |
|
learning_rate: 2.0 |
|
max_grad_norm: 0.0 |
|
|
|
batch_size: 4096 |
|
batch_type: tokens |
|
normalization: tokens |
|
dropout: 0.1 |
|
label_smoothing: 0.1 |
|
|
|
max_generator_batches: 2 |
|
|
|
param_init: 0.0 |
|
param_init_glorot: 'true' |
|
position_encoding: 'true' |
|
|
|
world_size: 1 |
|
gpu_ranks: |
|
- 0 |
|
|
|
|