export CORES=`grep -c ^processor /proc/cpuinfo` #export XLA_PYTHON_CLIENT_PREALLOCATE=false export HF_PROJECT="byt5-small-ccmatrix-en-nl" # export SOURCE_LANG="en" export TARGET_LANG="nl" export DATASET="yhavinga/ccmatrix" export DATASET_CONFIG="en-nl-25_000_000" export MODEL_NAME_OR_PATH="yhavinga/byt5-small-dutch-english" export MODEL_PATH="${HOME}/data/${HF_PROJECT}" # Path to the model export HF_DATASETS_CACHE=/mnt/ramdisk mkdir -p ${MODEL_PATH} python ../run_s2s_flax_pmap.py \ --output_dir="${MODEL_PATH}" \ --model_name_or_path ${MODEL_NAME_OR_PATH} \ --dataset_name ${DATASET} \ --dataset_config_name ${DATASET_CONFIG} \ --preprocessing_num_workers="${CORES}" \ --source_lang="${SOURCE_LANG}" \ --target_lang="${TARGET_LANG}" \ --metric_name="sacrebleu" \ --do_train --do_eval --do_predict \ --predict_with_generate \ --learning_rate="0.0008475" \ --adam_beta1="0.9" \ --adam_beta2="0.9969" \ --adam_epsilon="1e-8" \ --weight_decay="0.03458" \ --label_smoothing_factor="0.15" \ --length_penalty="1.3" \ --warmup_steps 500 \ --dropout_rate="0.1" \ --dtype="bfloat16" \ --z_loss="1e-4" \ --per_device_train_batch_size 32 \ --per_device_eval_batch_size 32 \ --gradient_accumulation_steps 4 \ --overwrite_output_dir \ --max_source_length 352 --max_target_length 352 \ --num_beams 5 \ --overwrite_output_dir \ --logging_steps 10 \ --save_steps 1000 \ --eval_steps 1000 \ --num_train_epochs 1 \ --max_eval_samples 2000 \ --max_predict_samples 2000 \ --validation_split_count 2000 \ --wandb_project="byt5-small-ccmatrix-en-nl" \ --wandb_job_type="pmap" # --max_train_samples="1_064_886" \