File size: 482 Bytes
af6e330 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
torchrun --nproc-per-node 8 -m open_lm.main \
--model open_lm_1b \
--workers 1 \
--dataset-resampled \
--precision amp_bfloat16 \
--grad-checkpointing \
--log-every-n-steps 100 \
--train-num-samples 1000 \
--grad-clip-norm 1 \
--data-key jsonl \
--dataset-type synthetic \
--lr 1e-5 \
--fsdp --fsdp-amp \
--warmup 2000 \
--wd 0.1 \
--beta2 0.95 \
--epochs 2 \
--report-to tensorboard \
--name open_lm_update_test_01 \
|