checkpointing_steps: '15000' | |
config_name: null | |
context_size: 4 | |
context_word_dropout: 0.1 | |
dataset_config_name: iwslt2017-en-fr | |
dataset_name: gsarti/iwslt2017_context | |
gradient_accumulation_steps: 2 | |
hub_model_id: context-mt/iwslt17-mbart50-1toM-ctx4-cwd1-en-fr | |
hub_token: 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 | |
ignore_pad_token_for_loss: true | |
learning_rate: 5.0e-05 | |
logging_steps: '600' | |
lr_scheduler_type: linear | |
max_length: 128 | |
max_source_length: 512 | |
max_target_length: 128 | |
max_train_steps: 145520 | |
model_name_or_path: facebook/mbart-large-50-one-to-many-mmt | |
model_type: null | |
num_beams: 5 | |
num_train_epochs: 5 | |
num_warmup_steps: 1000 | |
output_dir: /scratch/p305238/iwslt17-mbart50-1toM-ctx4-cwd1-en-fr | |
overwrite_cache: false | |
pad_to_max_length: true | |
per_device_eval_batch_size: 8 | |
per_device_train_batch_size: 4 | |
predict_with_generate: true | |
preprocessing_num_workers: null | |
push_to_hub: true | |
report_to: tensorboard | |
resume_from_checkpoint: null | |
sample_context: true | |
seed: null | |
source_lang: en_XX | |
target_lang: fr_XX | |
tokenizer_name: null | |
train_file: null | |
use_slow_tokenizer: false | |
val_max_target_length: null | |
validation_file: null | |
weight_decay: 0.0 | |
with_tracking: true | |