export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 | |
torchrun --nproc-per-node 8 -m open_lm.main \ | |
--model open_lm_1b \ | |
--train-data /home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/train_data.jsonl \ | |
--val-data /home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/val_data.jsonl \ | |
--workers 1 \ | |
--dataset-resampled \ | |
--precision amp_bfloat16 \ | |
--grad-checkpointing \ | |
--log-every-n-steps 20 \ | |
--grad-clip-norm 1 \ | |
--data-key jsonl \ | |
--val-data-key jsonl \ | |
--dataset-type jsonl \ | |
--lr 1e-5 \ | |
--fsdp --fsdp-amp \ | |
--warmup 400 \ | |
--wd 0.1 \ | |
--beta2 0.95 \ | |
--epochs 5 \ | |
--report-to tensorboard \ | |
--name open_lm_alpaca \ | |