checkpointing_steps: '15000' config_name: null context_size: 4 context_word_dropout: 0.0 dataset_config_name: iwslt2017-en-fr dataset_name: gsarti/iwslt2017_context gradient_accumulation_steps: 1 hub_model_id: context-mt/iwslt17-marian-small-target-ctx4-cwd0-en-fr hub_token: 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 ignore_pad_token_for_loss: true learning_rate: 5.0e-05 logging_steps: '600' lr_scheduler_type: linear max_length: 128 max_source_length: 512 max_target_length: 512 max_train_steps: 72760 model_name_or_path: Helsinki-NLP/opus-mt-en-fr model_type: null num_beams: 5 num_train_epochs: 5 num_warmup_steps: 1000 output_dir: /scratch/p305238/iwslt17-marian-small-target-ctx4-cwd0-en-fr overwrite_cache: false pad_to_max_length: true per_device_eval_batch_size: 8 per_device_train_batch_size: 16 predict_with_generate: true preprocessing_num_workers: null push_to_hub: true report_to: tensorboard resume_from_checkpoint: null sample_context: true seed: null source_lang: en_XX target_lang: fr_XX tokenizer_name: null train_file: null use_slow_tokenizer: false use_target_context: true val_max_target_length: null validation_file: null weight_decay: 0.0 with_tracking: true