Create README.md
Browse filesTrain with ppo_trainer
parameter:
adap_kl_ctrl :true
backward_batch_size :1
batch_size :32
cliprange :0.2
cliprange_value :0.2
compare_steps :1
early_stopping :false
exp_name :"example_1_3b"
forward_batch_size :null
gamma :1
global_backward_batch_size :1
global_batch_size :32
gradient_accumulation_steps :1
horizon ;10,000
init_kl_coef :0.2
is_encoder_decoder :false
is_peft_model :true
kl_penalty :"kl"
lam :0.95
learning_rate :0.000005
log_with :"wandb"
max_grad_norm :null
mini_batch_size : 1
model_name :null
optimize_cuda_cache :null
optimize_device_cache ;false
ppo_epochs :4
query_dataset :null
ratio_threshold :10
remove_unused_columns :true
reward_model :null
score_clip :null
seed :0
steps :20,000
target :6
target_kl :2
task_name :null
total_ppo_epochs :3
tracker_project_name :"trl"
use_score_norm :false
use_score_scaling :true
vf_coef :0.1
whiten_rewards :false
world_size :1
![W&B Chart 1_13_2024, 7_31_37 PM.png](https://cdn-uploads.huggingface.co/production/uploads/64c0be34e175dd56a57151ca/IFNEMnDS0B0pSCSQGpHcJ.png)
![regplot-0.png](https://cdn-uploads.huggingface.co/production/uploads/64c0be34e175dd56a57151ca/u1ebamFLrhvMRnY1pDF6P.png)
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
datasets:
|
4 |
+
- HuggingFaceH4/ultrafeedback_binarized
|
5 |
+
language:
|
6 |
+
- en
|
7 |
+
library_name: transformers
|
8 |
+
pipeline_tag: question-answering
|
9 |
+
tags:
|
10 |
+
- humman feedback
|
11 |
+
- HH-RLHF
|
12 |
+
- PPO
|
13 |
+
- lama-1.3B
|
14 |
+
---
|