python run_muril_bb_flax_mlm.py \ | |
--model_name_or_path muril-bigbird \ | |
--tokenizer_name muril-bigbird \ | |
--dtype bfloat16 \ | |
--pretokenized \ | |
--resume_from_checkpoint "checkpoint_100000" \ | |
--train_file "/dev/shm/data-files" \ | |
--validation_file "." \ | |
--max_seq_length 4096 \ | |
--pad_to_max_length \ | |
--output_dir ./ \ | |
--overwrite_output_dir \ | |
--do_train \ | |
--do_eval \ | |
--eval_steps 5000 \ | |
--per_device_train_batch_size 1 \ | |
--per_device_eval_batch_size 2 \ | |
--learning_rate 5e-5 \ | |
--weight_decay 0.01 \ | |
--num_train_epochs 3 \ | |
--warmup_steps 0 \ | |
--logging_steps 1000 \ | |
--save_steps 25000 \ | |
--save_total_limit 1 \ | |
--seed 18 \ | |
--push_to_hub \ | |
--run_name "en-ta-hi-te-from-150k" | |