export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --nproc-per-node 8 -m open_lm.main \ --model open_lm_1b \ --workers 1 \ --dataset-resampled \ --precision amp_bfloat16 \ --grad-checkpointing \ --log-every-n-steps 100 \ --train-num-samples 1000 \ --grad-clip-norm 1 \ --data-key jsonl \ --dataset-type synthetic \ --lr 1e-5 \ --fsdp --fsdp-amp \ --warmup 2000 \ --wd 0.1 \ --beta2 0.95 \ --epochs 2 \ --report-to tensorboard \ --name open_lm_update_test_01 \