muril-bigbird-base / run_muril_bb_flax.sh
nbroad's picture
nbroad HF staff
Saving weights and logs of step 125000
fc39d0b
#! /usr/bin/env bash
python run_muril_bb_flax_mlm.py \
--model_name_or_path muril-bigbird \
--tokenizer_name muril-bigbird \
--dtype bfloat16 \
--pretokenized \
--resume_from_checkpoint "checkpoint_100000" \
--train_file "/dev/shm/data-files" \
--validation_file "." \
--max_seq_length 4096 \
--pad_to_max_length \
--output_dir ./ \
--overwrite_output_dir \
--do_train \
--do_eval \
--eval_steps 5000 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 2 \
--learning_rate 5e-5 \
--weight_decay 0.01 \
--num_train_epochs 3 \
--warmup_steps 0 \
--logging_steps 1000 \
--save_steps 25000 \
--save_total_limit 1 \
--seed 18 \
--push_to_hub \
--run_name "en-ta-hi-te-from-150k"