sparse / ms-swift /train /test_num_proc.sh
Enxin's picture
Upload folder using huggingface_hub
96fe658 verified
#!/bin/bash
# 测试不同 dataset_num_proc 设置的效果
echo "测试 dataset_num_proc 参数对预处理速度的影响"
# 测试 dataset_num_proc=1
echo "=== 测试 dataset_num_proc=1 ==="
time swift sft \
--model Qwen/Qwen2.5-7B-Instruct \
--train_type full \
--deepspeed zero3 \
--attn_impl flash_attn \
--sequence_parallel_size 4 \
--freeze_vit false \
--freeze_aligner false \
--freeze_llm false \
--use_hf true \
--dataset '/home/ubuntu/ext-mamba-illinois/wenhao-project/enxin/FullAttn/ms-swift/datasets/jsonl/LLaVA-Video-117K#100' \
--split_dataset_ratio 0.001 \
--torch_dtype bfloat16 \
--num_train_epochs 1 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--learning_rate 5e-6 \
--gradient_accumulation_steps 16 \
--eval_steps 200 \
--save_steps 200 \
--save_total_limit 2 \
--logging_steps 5 \
--max_length 65536 \
--output_dir output_test_1 \
--warmup_ratio 0.05 \
--dataloader_num_workers 1 \
--dataset_num_proc 1 \
--gradient_checkpointing true \
--max_grad_norm 1.0 \
--use_liger_kernel true \
--loss_scale default
echo "=== 测试 dataset_num_proc=2 ==="
time swift sft \
--model Qwen/Qwen2.5-7B-Instruct \
--train_type full \
--deepspeed zero3 \
--attn_impl flash_attn \
--sequence_parallel_size 4 \
--freeze_vit false \
--freeze_aligner false \
--freeze_llm false \
--use_hf true \
--dataset '/home/ubuntu/ext-mamba-illinois/wenhao-project/enxin/FullAttn/ms-swift/datasets/jsonl/LLaVA-Video-117K#100' \
--split_dataset_ratio 0.001 \
--torch_dtype bfloat16 \
--num_train_epochs 1 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--learning_rate 5e-6 \
--gradient_accumulation_steps 16 \
--eval_steps 200 \
--save_steps 200 \
--save_total_limit 2 \
--logging_steps 5 \
--max_length 65536 \
--output_dir output_test_2 \
--warmup_ratio 0.05 \
--dataloader_num_workers 1 \
--dataset_num_proc 2 \
--gradient_checkpointing true \
--max_grad_norm 1.0 \
--use_liger_kernel true \
--loss_scale default
echo "=== 测试 dataset_num_proc=4 ==="
time swift sft \
--model Qwen/Qwen2.5-7B-Instruct \
--train_type full \
--deepspeed zero3 \
--attn_impl flash_attn \
--sequence_parallel_size 4 \
--freeze_vit false \
--freeze_aligner false \
--freeze_llm false \
--use_hf true \
--dataset '/home/ubuntu/ext-mamba-illinois/wenhao-project/enxin/FullAttn/ms-swift/datasets/jsonl/LLaVA-Video-117K#100' \
--split_dataset_ratio 0.001 \
--torch_dtype bfloat16 \
--num_train_epochs 1 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--learning_rate 5e-6 \
--gradient_accumulation_steps 16 \
--eval_steps 200 \
--save_steps 200 \
--save_total_limit 2 \
--logging_steps 5 \
--max_length 65536 \
--output_dir output_test_4 \
--warmup_ratio 0.05 \
--dataloader_num_workers 1 \
--dataset_num_proc 4 \
--gradient_checkpointing true \
--max_grad_norm 1.0 \
--use_liger_kernel true \
--loss_scale default
echo "测试完成!"