export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 | |
torchrun --nproc-per-node 8 -m open_lm.main \ | |
--model open_lm_1b \ | |
--workers 1 \ | |
--dataset-resampled \ | |
--precision amp_bfloat16 \ | |
--grad-checkpointing \ | |
--log-every-n-steps 100 \ | |
--train-num-samples 1000 \ | |
--grad-clip-norm 1 \ | |
--data-key jsonl \ | |
--dataset-type synthetic \ | |
--lr 1e-5 \ | |
--fsdp --fsdp-amp \ | |
--warmup 2000 \ | |
--wd 0.1 \ | |
--beta2 0.95 \ | |
--epochs 2 \ | |
--report-to tensorboard \ | |
--name open_lm_update_test_01 \ | |