File size: 482 Bytes
af6e330
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
torchrun --nproc-per-node 8 -m open_lm.main   \
 --model open_lm_1b \
 --workers 1 \
 --dataset-resampled \
 --precision amp_bfloat16 \
 --grad-checkpointing \
 --log-every-n-steps 100 \
 --train-num-samples 1000 \
 --grad-clip-norm 1 \
 --data-key jsonl \
 --dataset-type synthetic \
 --lr 1e-5 \
 --fsdp --fsdp-amp \
 --warmup 2000 \
 --wd 0.1 \
 --beta2 0.95 \
 --epochs 2 \
 --report-to tensorboard \
 --name open_lm_update_test_01 \