|
#!/bin/bash |
|
|
|
|
|
echo "测试 dataset_num_proc 参数对预处理速度的影响" |
|
|
|
|
|
echo "=== 测试 dataset_num_proc=1 ===" |
|
time swift sft \ |
|
--model Qwen/Qwen2.5-7B-Instruct \ |
|
--train_type full \ |
|
--deepspeed zero3 \ |
|
--attn_impl flash_attn \ |
|
--sequence_parallel_size 4 \ |
|
--freeze_vit false \ |
|
--freeze_aligner false \ |
|
--freeze_llm false \ |
|
--use_hf true \ |
|
--dataset '/home/ubuntu/ext-mamba-illinois/wenhao-project/enxin/FullAttn/ms-swift/datasets/jsonl/LLaVA-Video-117K#100' \ |
|
--split_dataset_ratio 0.001 \ |
|
--torch_dtype bfloat16 \ |
|
--num_train_epochs 1 \ |
|
--per_device_train_batch_size 1 \ |
|
--per_device_eval_batch_size 1 \ |
|
--learning_rate 5e-6 \ |
|
--gradient_accumulation_steps 16 \ |
|
--eval_steps 200 \ |
|
--save_steps 200 \ |
|
--save_total_limit 2 \ |
|
--logging_steps 5 \ |
|
--max_length 65536 \ |
|
--output_dir output_test_1 \ |
|
--warmup_ratio 0.05 \ |
|
--dataloader_num_workers 1 \ |
|
--dataset_num_proc 1 \ |
|
--gradient_checkpointing true \ |
|
--max_grad_norm 1.0 \ |
|
--use_liger_kernel true \ |
|
--loss_scale default |
|
|
|
echo "=== 测试 dataset_num_proc=2 ===" |
|
time swift sft \ |
|
--model Qwen/Qwen2.5-7B-Instruct \ |
|
--train_type full \ |
|
--deepspeed zero3 \ |
|
--attn_impl flash_attn \ |
|
--sequence_parallel_size 4 \ |
|
--freeze_vit false \ |
|
--freeze_aligner false \ |
|
--freeze_llm false \ |
|
--use_hf true \ |
|
--dataset '/home/ubuntu/ext-mamba-illinois/wenhao-project/enxin/FullAttn/ms-swift/datasets/jsonl/LLaVA-Video-117K#100' \ |
|
--split_dataset_ratio 0.001 \ |
|
--torch_dtype bfloat16 \ |
|
--num_train_epochs 1 \ |
|
--per_device_train_batch_size 1 \ |
|
--per_device_eval_batch_size 1 \ |
|
--learning_rate 5e-6 \ |
|
--gradient_accumulation_steps 16 \ |
|
--eval_steps 200 \ |
|
--save_steps 200 \ |
|
--save_total_limit 2 \ |
|
--logging_steps 5 \ |
|
--max_length 65536 \ |
|
--output_dir output_test_2 \ |
|
--warmup_ratio 0.05 \ |
|
--dataloader_num_workers 1 \ |
|
--dataset_num_proc 2 \ |
|
--gradient_checkpointing true \ |
|
--max_grad_norm 1.0 \ |
|
--use_liger_kernel true \ |
|
--loss_scale default |
|
|
|
echo "=== 测试 dataset_num_proc=4 ===" |
|
time swift sft \ |
|
--model Qwen/Qwen2.5-7B-Instruct \ |
|
--train_type full \ |
|
--deepspeed zero3 \ |
|
--attn_impl flash_attn \ |
|
--sequence_parallel_size 4 \ |
|
--freeze_vit false \ |
|
--freeze_aligner false \ |
|
--freeze_llm false \ |
|
--use_hf true \ |
|
--dataset '/home/ubuntu/ext-mamba-illinois/wenhao-project/enxin/FullAttn/ms-swift/datasets/jsonl/LLaVA-Video-117K#100' \ |
|
--split_dataset_ratio 0.001 \ |
|
--torch_dtype bfloat16 \ |
|
--num_train_epochs 1 \ |
|
--per_device_train_batch_size 1 \ |
|
--per_device_eval_batch_size 1 \ |
|
--learning_rate 5e-6 \ |
|
--gradient_accumulation_steps 16 \ |
|
--eval_steps 200 \ |
|
--save_steps 200 \ |
|
--save_total_limit 2 \ |
|
--logging_steps 5 \ |
|
--max_length 65536 \ |
|
--output_dir output_test_4 \ |
|
--warmup_ratio 0.05 \ |
|
--dataloader_num_workers 1 \ |
|
--dataset_num_proc 4 \ |
|
--gradient_checkpointing true \ |
|
--max_grad_norm 1.0 \ |
|
--use_liger_kernel true \ |
|
--loss_scale default |
|
|
|
echo "测试完成!" |