Add files using upload-large-folder tool

cb2428f verified 3 months ago

1.16 kB

	# ZeRO3: 91.2s/it; 16 * 80GiB
	# Megatron-LM: 9.6s/it; 16 * 60GiB
	# Launch using Alibaba Cloud DLC
	# ref: https://github.com/modelscope/ms-swift/blob/main/examples/train/multi-node/dlc/train.sh
	NNODES=$WORLD_SIZE \
	NODE_RANK=$RANK \
	megatron sft \
	--load Qwen3-30B-A3B-Base-mcore \
	--dataset 'liucong/Chinese-DeepSeek-R1-Distill-data-110k-SFT' \
	--tensor_model_parallel_size 2 \
	--expert_model_parallel_size 8 \
	--moe_grouped_gemm true \
	--moe_shared_expert_overlap true \
	--moe_aux_loss_coeff 0.01 \
	--micro_batch_size 1 \
	--global_batch_size 16 \
	--packing true \
	--recompute_granularity full \
	--recompute_method uniform \
	--recompute_num_layers 1 \
	--train_iters 2000 \
	--eval_iters 50 \
	--finetune true \
	--cross_entropy_loss_fusion true \
	--lr 1e-5 \
	--lr_warmup_iters 100 \
	--min_lr 1e-6 \
	--save megatron_output/Qwen3-30B-A3B-Base \
	--eval_interval 200 \
	--save_interval 200 \
	--max_length 8192 \
	--num_workers 8 \
	--dataset_num_proc 8 \
	--no_save_optim true \
	--no_save_rng true \
	--sequence_parallel true \
	--use_flash_attn true