CUDA_VISIBLE_DEVICES=0 \ | |
swift sft \ | |
--model "Qwen/Qwen2.5-0.5B-Instruct" \ | |
--train_type "lora" \ | |
--dataset "AI-ModelScope/alpaca-gpt4-data-zh#100" \ | |
--torch_dtype "bfloat16" \ | |
--num_train_epochs "1" \ | |
--per_device_train_batch_size "1" \ | |
--learning_rate "1e-4" \ | |
--lora_rank "8" \ | |
--lora_alpha "32" \ | |
--target_modules "all-linear" \ | |
--gradient_accumulation_steps "16" \ | |
--save_steps "50" \ | |
--save_total_limit "5" \ | |
--logging_steps "5" \ | |
--max_length "2048" \ | |
--eval_strategy "steps" \ | |
--eval_steps "5" \ | |
--per_device_eval_batch_size "5" \ | |
--eval_use_evalscope \ | |
--eval_dataset "gsm8k" \ | |
--eval_dataset_args '{"gsm8k": {"few_shot_num": 0}}' \ | |
--eval_limit "10" | |