update Read.me
Browse files
README.md
CHANGED
@@ -11,4 +11,31 @@ tags:
|
|
11 |
- HH-RLHF
|
12 |
- PPO
|
13 |
- lama-1.3B
|
14 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
- HH-RLHF
|
12 |
- PPO
|
13 |
- lama-1.3B
|
14 |
+
---
|
15 |
+
|
16 |
+
# RLHF with ppo_Trainer and Lora
|
17 |
+
|
18 |
+
![image/png](https://cdn-uploads.huggingface.co/production/uploads/64c0be34e175dd56a57151ca/piXU-OqDgrBKs7qR7fICw.png)
|
19 |
+
|
20 |
+
![image/png](https://cdn-uploads.huggingface.co/production/uploads/64c0be34e175dd56a57151ca/OXD2TqlQQY9NuC7JTiv_H.png)
|
21 |
+
|
22 |
+
# Hyperparameter
|
23 |
+
|
24 |
+
#ppo
|
25 |
+
learning_rate=5e-6,
|
26 |
+
batch_size=32,
|
27 |
+
mini_batch_size=1,
|
28 |
+
horizon=10000,
|
29 |
+
cliprange =0.2,
|
30 |
+
cliprange_value=0.2,
|
31 |
+
lam=0.95,
|
32 |
+
target_kl=2,
|
33 |
+
use_score_scaling = True,
|
34 |
+
log_with='wandb'
|
35 |
+
|
36 |
+
#lora
|
37 |
+
r=16,
|
38 |
+
lora_alpha=32,
|
39 |
+
lora_dropout=0.05,
|
40 |
+
bias="none",
|
41 |
+
task_type="CAUSAL_LM",
|