export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
torchrun --nproc-per-node 8 -m open_lm.main   \
 --model open_lm_1b \
 --train-data /home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/train_data.jsonl \
 --val-data /home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/val_data.jsonl \
 --workers 1 \
 --dataset-resampled \
 --precision amp_bfloat16 \
 --grad-checkpointing \
 --log-every-n-steps 20 \
 --grad-clip-norm 1 \
 --data-key jsonl \
 --val-data-key jsonl \
 --dataset-type jsonl \
 --lr 1e-5 \
 --fsdp --fsdp-amp \
 --warmup 400 \
 --wd 0.1 \
 --beta2 0.95 \
 --epochs 5 \
 --report-to tensorboard \
 --name open_lm_alpaca \