olm-chat-7b / open_lm /train_alpaca.sh
henhenhahi111112's picture
Upload folder using huggingface_hub
af6e330 verified
raw
history blame
644 Bytes
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
torchrun --nproc-per-node 8 -m open_lm.main \
--model open_lm_1b \
--train-data /home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/train_data.jsonl \
--val-data /home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/val_data.jsonl \
--workers 1 \
--dataset-resampled \
--precision amp_bfloat16 \
--grad-checkpointing \
--log-every-n-steps 20 \
--grad-clip-norm 1 \
--data-key jsonl \
--val-data-key jsonl \
--dataset-type jsonl \
--lr 1e-5 \
--fsdp --fsdp-amp \
--warmup 400 \
--wd 0.1 \
--beta2 0.95 \
--epochs 5 \
--report-to tensorboard \
--name open_lm_alpaca \