sparse / ms-swift /examples /train /grpo /plugin /run_external_reward_model.sh
Enxin's picture
Upload folder using huggingface_hub
96fe658 verified
# see rm_plugin example in swift/plugin/rm_plugin.py
# register customized plugin in external_plugins file
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
NPROC_PER_NODE=8 \
swift rlhf \
--rlhf_type grpo \
--model Qwen/Qwen2.5-7B \
--dataset AI-MO/NuminaMath-TIR#5000 \
--use_vllm true \
--vllm_mode colocate \
--vllm_gpu_memory_utilization 0.5 \
--external_plugins examples/train/grpo/plugin/plugin.py \
--reward_funcs format \
--reward_model Qwen/Qwen2.5-3B-Instruct Shanghai_AI_Laboratory/internlm2-7b-reward \
--reward_model_plugin genrm my_rmplugin \
--reward_weights 0.1 1 1 \
--sleep_level 1 \
--offload_model true \
--offload_optimizer true \
--log_completions true \
--deepspeed zero2