Create README.md

Train with ppo_trainer

parameter:
adap_kl_ctrl :true
backward_batch_size :1
batch_size :32
cliprange :0.2
cliprange_value :0.2
compare_steps :1
early_stopping :false
exp_name :"example_1_3b"
forward_batch_size :null
gamma :1
global_backward_batch_size :1
global_batch_size :32
gradient_accumulation_steps :1
horizon ;10,000
init_kl_coef :0.2
is_encoder_decoder :false
is_peft_model :true
kl_penalty :"kl"
lam :0.95
learning_rate :0.000005
log_with :"wandb"
max_grad_norm :null
mini_batch_size : 1
model_name :null
optimize_cuda_cache :null
optimize_device_cache ;false
ppo_epochs :4
query_dataset :null
ratio_threshold :10
remove_unused_columns :true
reward_model :null
score_clip :null
seed :0
steps :20,000
target :6
target_kl :2
task_name :null
total_ppo_epochs :3
tracker_project_name :"trl"
use_score_norm :false
use_score_scaling :true
vf_coef :0.1
whiten_rewards :false
world_size :1

![W&B Chart 1_13_2024, 7_31_37 PM.png](https://cdn-uploads.huggingface.co/production/uploads/64c0be34e175dd56a57151ca/IFNEMnDS0B0pSCSQGpHcJ.png)

![regplot-0.png](https://cdn-uploads.huggingface.co/production/uploads/64c0be34e175dd56a57151ca/u1ebamFLrhvMRnY1pDF6P.png)

Files changed (1) hide show

README.md +14 -0

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+license: apache-2.0
+datasets:
+- HuggingFaceH4/ultrafeedback_binarized
+language:
+- en
+library_name: transformers
+pipeline_tag: question-answering
+tags:
+- humman feedback
+- HH-RLHF
+- PPO
+- lama-1.3B
+---