bengali-t5-base / run.sh
sbmaruf's picture
ckpt
6d3bf36
EXP_FOLDER="dumped/bengali_t5_base"
CACHE_DIR=$EXP_FOLDER/
MODEL_CKPT=$EXP_FOLDER/
mkdir -p $EXP_FOLDER
mkdir -p $CACHE_DIR
mkdir -p $MODEL_CKPT
TOKENIZER_DIR="dumped/bengali_t5_base/tokenizer"
MODEL_CONFIG="t5-base"
MAX_SEQ_LEN=512
NUM_THREAD=50
DATASET_NAME="mc4"
DATASET_CONFIG_NAME="bn"
python -u run_t5_mlm_flax.py \
--output_dir ${MODEL_CKPT} \
--model_type "t5" \
--config_name $MODEL_CONFIG \
--tokenizer_name ${TOKENIZER_DIR} \
--dataset_name $DATASET_NAME \
--dataset_config_name $DATASET_CONFIG_NAME \
--max_seq_length $MAX_SEQ_LEN \
--per_device_train_batch_size 8 \
--per_device_eval_batch_size 8 \
--adafactor \
--learning_rate 1e-3 \
--weight_decay 0.001 \
--warmup_steps 5000 \
--overwrite_output_dir \
--num_train_epochs 10 \
--logging_steps 500 \
--save_steps 2500 \
--eval_steps 7500 \
--preprocessing_num_workers $NUM_THREAD \
--dtype bfloat16