Add files using upload-large-folder tool

cb2428f verified 4 months ago

935 Bytes

	# 8 * 65GiB
	NPROC_PER_NODE=8 \
	CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
	megatron sft \
	--load Qwen1.5-MoE-A2.7B-mcore \
	--dataset 'liucong/Chinese-DeepSeek-R1-Distill-data-110k-SFT' \
	--tensor_model_parallel_size 2 \
	--expert_model_parallel_size 4 \
	--moe_grouped_gemm true \
	--moe_shared_expert_overlap true \
	--moe_aux_loss_coeff 0.01 \
	--micro_batch_size 1 \
	--global_batch_size 16 \
	--packing true \
	--recompute_granularity selective \
	--train_iters 2000 \
	--eval_iters 50 \
	--finetune true \
	--cross_entropy_loss_fusion true \
	--lr 1e-5 \
	--lr_warmup_iters 100 \
	--min_lr 1e-6 \
	--save megatron_output/Qwen1.5-MoE-A2.7B \
	--eval_interval 200 \
	--save_interval 200 \
	--max_length 8192 \
	--num_workers 8 \
	--dataset_num_proc 8 \
	--no_save_optim true \
	--no_save_rng true \
	--sequence_parallel true \
	--use_flash_attn true