checkpointing_steps: '1000' config_name: null context_size: 4 context_word_dropout: 0.1 dataset_config_name: sentences dataset_name: inseq/scat gradient_accumulation_steps: 1 hub_model_id: context-mt/scat-marian-small-ctx4-cwd1-en-fr hub_token: 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 ignore_pad_token_for_loss: true learning_rate: 5.0e-05 logging_steps: '100' lr_scheduler_type: linear max_length: 128 max_source_length: 512 max_target_length: 128 max_train_steps: 3470 model_name_or_path: context-mt/iwslt17-marian-small-ctx4-cwd1-en-fr model_type: null num_beams: 5 num_train_epochs: 5 num_warmup_steps: 0 output_dir: /scratch/p305238/scat-marian-small-ctx4-cwd1-en-fr overwrite_cache: false pad_to_max_length: true per_device_eval_batch_size: 8 per_device_train_batch_size: 16 predict_with_generate: true preprocessing_num_workers: null push_to_hub: true report_to: tensorboard resume_from_checkpoint: null sample_context: true seed: null source_lang: en_XX target_lang: fr_XX tokenizer_name: null train_file: null use_slow_tokenizer: false val_max_target_length: null validation_file: null weight_decay: 0.0 with_tracking: true