|
python3 run_dnlm_flax.py \ |
|
--output_dir "." \ |
|
--overwrite_output_dir \ |
|
--dataset_path NbAiLab/NCC2 \ |
|
--config_name rotobart \ |
|
--tokenizer_name vocab-2/the_pile.model \ |
|
--shuffle_buffer_size 100_000 \ |
|
--do_train --do_eval \ |
|
--max_seq_length 1024 \ |
|
--encoder_layers 12 \ |
|
--decoder_layers 12 \ |
|
--per_device_train_batch_size 1 \ |
|
--per_device_eval_batch_size 1 \ |
|
--logging_steps 8 \ |
|
--num_train_steps 100000 \ |
|
--eval_steps 10000 \ |
|
--save_steps 10000 \ |
|
--num_eval_samples 500 \ |
|
--warmup_steps 5000 \ |
|
--learning_rate 1e-4 \ |
|
--auth_token True \ |
|
--save_strategy steps \ |
|
--use_bf16 \ |
|
|