Laurie commited on
Commit
4dad3de
1 Parent(s): ce80594

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +17 -1
README.md CHANGED
@@ -9,4 +9,20 @@
9
 
10
  # 终端输入
11
  CUDA_VISIBLE_DEVICES=0 python src/infer.py \
12
- --checkpoint_dir path_to_checkpoint # repo files
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  # 终端输入
11
  CUDA_VISIBLE_DEVICES=0 python src/infer.py \
12
+ --checkpoint_dir path_to_checkpoint # repo files
13
+
14
+ # PPO训练,创建文件夹path_to_rm_checkpoint,将此repo的文件存入其中,运行下列命令,3090预估50小时
15
+ CUDA_VISIBLE_DEVICES=0 python src/train_ppo.py \
16
+ --do_train \
17
+ --dataset alpaca_gpt4_en \
18
+ --finetuning_type lora \
19
+ --reward_model path_to_rm_checkpoint \
20
+ --output_dir path_to_ppo_checkpoint \
21
+ --per_device_train_batch_size 4 \
22
+ --gradient_accumulation_steps 4 \
23
+ --lr_scheduler_type cosine \
24
+ --logging_steps 10 \
25
+ --save_steps 1000 \
26
+ --learning_rate 5e-5 \
27
+ --num_train_epochs 1.0 \
28
+ --fp16