checkpointing_steps: '1000'
config_name: null
context_size: 4
context_word_dropout: 0.1
dataset_config_name: sentences
dataset_name: inseq/scat
gradient_accumulation_steps: 2
hub_model_id: context-mt/scat-marian-big-ctx4-cwd1-en-fr
hub_token: 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
ignore_pad_token_for_loss: true
learning_rate: 5.0e-05
logging_steps: '200'
lr_scheduler_type: linear
max_length: 128
max_source_length: 512
max_target_length: 128
max_train_steps: 1388
model_name_or_path: context-mt/iwslt17-marian-big-ctx4-cwd1-en-fr
model_type: null
num_beams: 5
num_train_epochs: 2
num_warmup_steps: 0
output_dir: /scratch/p305238/scat-marian-big-ctx4-cwd1-en-fr
overwrite_cache: false
pad_to_max_length: true
per_device_eval_batch_size: 8
per_device_train_batch_size: 8
predict_with_generate: true
preprocessing_num_workers: null
push_to_hub: true
report_to: tensorboard
resume_from_checkpoint: null
sample_context: true
seed: null
source_lang: en_XX
target_lang: fr_XX
tokenizer_name: null
train_file: null
use_slow_tokenizer: false
val_max_target_length: null
validation_file: null
weight_decay: 0.0
with_tracking: true