#!/bin/bash HF_DATASETS_IN_MEMORY_MAX_SIZE=200000000 MODEL_DIR=. TOKENIZER_DIR=tokenizer ./run_t5_mlm_flax.py \ --output_dir="${MODEL_DIR}/checkpoints" \ --model_type="t5" \ --config_name="t5-small" \ --tokenizer_name="${TOKENIZER_DIR}" \ --dataset_name="mc4" \ --dataset_config_name="bn" \ --max_seq_length="512" \ --per_device_train_batch_size="128" \ --per_device_eval_batch_size="128" \ --learning_rate="1e-3" \ --weight_decay="0.001" \ --warmup_steps="500" \ --overwrite_output_dir \ --num_train_epochs="10" \ --logging_steps="1" \ --save_steps="1" \ --eval_steps="1" \ --push_to_hub