BechusRantus's picture
Upload folder using huggingface_hub
7134ce7 verified
# Env: 8 * A100
# Max Length: 65536
# GPU Memory: 8 * 40GiB, Training Speed 26s/it
NPROC_PER_NODE=8 \
CELOSS_PARALLEL_SIZE=2048 \
swift sft \
--model Qwen/Qwen2.5-3B-Instruct \
--dataset 'AI-ModelScope/LongAlpaca-12k' \
--load_from_cache_file true \
--tuner_type lora \
--torch_dtype bfloat16 \
--per_device_train_batch_size 4 \
--target_modules all-linear \
--gradient_accumulation_steps 8 \
--save_total_limit 2 \
--save_only_model true \
--save_steps 50 \
--max_length 65536 \
--warmup_ratio 0.05 \
--attn_impl flash_attn \
--sequence_parallel_size 8 \
--logging_steps 1 \
--use_logits_to_keep false \
--padding_free true \