export CORES=`grep -c ^processor /proc/cpuinfo` export CORES=`echo "scale=0; ${CORES} * 0.8 / 1" | bc` #export XLA_PYTHON_CLIENT_PREALLOCATE=false export SOURCE_LANG="en" export TARGET_LANG="nl" export HF_PROJECT="ul2-base-nl36-neddx2-en-nl" # export DATASET="/home/yeb/data/nedd_x_dataset/nedd_x_dataset.py" #export DATASET_CONFIG="dict" export DATASET_CONFIG="voc8k_beta_3buf" export MODEL_NAME_OR_PATH="yhavinga/ul2-base-nl36-dutch" export TOKENIZER_NAME="yhavinga/ul2-base-nl36-dutch" export MODEL_PATH="${HOME}/data/${HF_PROJECT}" # Path to the model export HF_DATASETS_CACHE=/mnt/ramdisk # 52k 8k 32ksp #l 472 500 #b0 328 352 #b1 472 480 370 #b2 1920 1984 mkdir -p ${MODEL_PATH} python ../run_s2s_flax_pmap_multiseq.py \ --output_dir="${MODEL_PATH}" \ --model_name_or_path ${MODEL_NAME_OR_PATH} \ --tokenizer_name ${TOKENIZER_NAME} \ --use_fast_tokenizer="False" \ --use_auth_token="True" \ --dataset_name_list ${DATASET}\ --dataset_config_name_list "${DATASET_CONFIG}"\ --id_filter_list "-b2-" \ --max_train_samples_list "0" \ --max_eval_samples_list "2000" \ --max_predict_samples_list "128" \ --preprocessing_num_workers="${CORES}" \ --source_lang="${SOURCE_LANG}" \ --target_lang="${TARGET_LANG}" \ --metric_name="sacrebleu" \ --do_train --do_eval --do_predict \ --predict_with_generate \ --learning_rate="0.0009" \ --adam_beta1="0.9" \ --adam_beta2="0.9969" \ --adam_epsilon="1e-8" \ --weight_decay="0.001" \ --label_smoothing_factor="0.11" \ --length_penalty="1.3" \ --warmup_steps 500 \ --dropout_rate="0.01" \ --dtype "bfloat16" \ --z_loss "1e-4" \ --dynamic_loss_scaling="False" \ --per_device_train_batch_size 4 \ --per_device_eval_batch_size 4 \ --gradient_accumulation_steps 16 \ --overwrite_output_dir \ --max_source_length_list 370 \ --max_target_length_list 370 \ --num_beams 5 \ --overwrite_output_dir \ --logging_steps 5 \ --save_steps 800 \ --eval_steps 800 \ --num_train_epochs 2 \ --max_eval_samples 512 \ --validation_split_count 2000 \ --wandb_project="${HF_PROJECT}" \ --wandb_job_type="pmap" # --resume_from_checkpoint="${MODEL_PATH}" \ # --max_train_samples="1_064_886" \ # --max_eval_samples 256 \ # --max_predict_samples 256 \