Model save
Browse files- README.md +12 -17
- all_results.json +4 -4
- model-00001-of-00003.safetensors +1 -1
- model-00002-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +1 -1
- train_results.json +4 -4
- trainer_state.json +163 -147
README.md
CHANGED
@@ -2,16 +2,10 @@
|
|
2 |
license: apache-2.0
|
3 |
base_model: alignment-handbook/zephyr-7b-sft-full
|
4 |
tags:
|
5 |
-
- alignment-handbook
|
6 |
-
- trl
|
7 |
-
- dpo
|
8 |
-
- generated_from_trainer
|
9 |
- trl
|
10 |
- dpo
|
11 |
- alignment-handbook
|
12 |
- generated_from_trainer
|
13 |
-
datasets:
|
14 |
-
- HuggingFaceH4/ultrafeedback_binarized
|
15 |
model-index:
|
16 |
- name: zephyr-7b-dpo-full-ultrabin-high-margin
|
17 |
results: []
|
@@ -22,17 +16,17 @@ should probably proofread and complete it, then remove this comment. -->
|
|
22 |
|
23 |
# zephyr-7b-dpo-full-ultrabin-high-margin
|
24 |
|
25 |
-
This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on
|
26 |
It achieves the following results on the evaluation set:
|
27 |
-
- Loss: 0.
|
28 |
-
- Rewards/chosen: -0.
|
29 |
-
- Rewards/rejected: -1.
|
30 |
-
- Rewards/accuracies: 0.
|
31 |
-
- Rewards/margins: 0.
|
32 |
-
- Logps/rejected: -
|
33 |
-
- Logps/chosen: -
|
34 |
-
- Logits/rejected: 0.
|
35 |
-
- Logits/chosen: -0.
|
36 |
|
37 |
## Model description
|
38 |
|
@@ -69,7 +63,8 @@ The following hyperparameters were used during training:
|
|
69 |
|
70 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
71 |
|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
72 |
-
| 0.
|
|
|
73 |
|
74 |
|
75 |
### Framework versions
|
|
|
2 |
license: apache-2.0
|
3 |
base_model: alignment-handbook/zephyr-7b-sft-full
|
4 |
tags:
|
|
|
|
|
|
|
|
|
5 |
- trl
|
6 |
- dpo
|
7 |
- alignment-handbook
|
8 |
- generated_from_trainer
|
|
|
|
|
9 |
model-index:
|
10 |
- name: zephyr-7b-dpo-full-ultrabin-high-margin
|
11 |
results: []
|
|
|
16 |
|
17 |
# zephyr-7b-dpo-full-ultrabin-high-margin
|
18 |
|
19 |
+
This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on an unknown dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 0.5598
|
22 |
+
- Rewards/chosen: -0.6746
|
23 |
+
- Rewards/rejected: -1.5654
|
24 |
+
- Rewards/accuracies: 0.75
|
25 |
+
- Rewards/margins: 0.8907
|
26 |
+
- Logps/rejected: -419.1961
|
27 |
+
- Logps/chosen: -330.0835
|
28 |
+
- Logits/rejected: 0.2134
|
29 |
+
- Logits/chosen: -0.2417
|
30 |
|
31 |
## Model description
|
32 |
|
|
|
63 |
|
64 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
65 |
|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
66 |
+
| 0.4719 | 0.3484 | 50 | 0.5899 | -0.3895 | -0.8981 | 0.7070 | 0.5086 | -352.4708 | -301.5678 | -1.9397 | -1.9963 |
|
67 |
+
| 0.3224 | 0.6969 | 100 | 0.5598 | -0.6746 | -1.5654 | 0.75 | 0.8907 | -419.1961 | -330.0835 | 0.2134 | -0.2417 |
|
68 |
|
69 |
|
70 |
### Framework versions
|
all_results.json
CHANGED
@@ -14,9 +14,9 @@
|
|
14 |
"eval_samples_per_second": 19.581,
|
15 |
"eval_steps_per_second": 0.313,
|
16 |
"total_flos": 0.0,
|
17 |
-
"train_loss": 0.
|
18 |
-
"train_runtime":
|
19 |
"train_samples": 18339,
|
20 |
-
"train_samples_per_second": 5.
|
21 |
-
"train_steps_per_second": 0.
|
22 |
}
|
|
|
14 |
"eval_samples_per_second": 19.581,
|
15 |
"eval_steps_per_second": 0.313,
|
16 |
"total_flos": 0.0,
|
17 |
+
"train_loss": 0.42797933008287337,
|
18 |
+
"train_runtime": 3631.7108,
|
19 |
"train_samples": 18339,
|
20 |
+
"train_samples_per_second": 5.05,
|
21 |
+
"train_steps_per_second": 0.039
|
22 |
}
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4943162336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2850ffba269c9beea7a27e656c559c5f876b38967c7a6b87d210c4d1b66e9185
|
3 |
size 4943162336
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999819336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f82482a5b53c718a34153ce2321f5c28b587d7da9ba733d4b9e3b74bff5feb2
|
3 |
size 4999819336
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4540516344
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88875900874228283080cbb0b5f0f9444fc6aa58ee90c727879470f81eae1681
|
3 |
size 4540516344
|
train_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"epoch": 0.9965156794425087,
|
3 |
"total_flos": 0.0,
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
"train_samples": 18339,
|
7 |
-
"train_samples_per_second": 5.
|
8 |
-
"train_steps_per_second": 0.
|
9 |
}
|
|
|
1 |
{
|
2 |
"epoch": 0.9965156794425087,
|
3 |
"total_flos": 0.0,
|
4 |
+
"train_loss": 0.42797933008287337,
|
5 |
+
"train_runtime": 3631.7108,
|
6 |
"train_samples": 18339,
|
7 |
+
"train_samples_per_second": 5.05,
|
8 |
+
"train_steps_per_second": 0.039
|
9 |
}
|
trainer_state.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
"epoch": 0.9965156794425087,
|
5 |
-
"eval_steps":
|
6 |
"global_step": 143,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
@@ -10,238 +10,254 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.06968641114982578,
|
13 |
-
"grad_norm": 8.
|
14 |
"learning_rate": 3.333333333333333e-07,
|
15 |
-
"logits/chosen": -2.
|
16 |
-
"logits/rejected": -2.
|
17 |
-
"logps/chosen": -224.
|
18 |
-
"logps/rejected": -205.
|
19 |
"loss": 0.6925,
|
20 |
-
"rewards/accuracies": 0.
|
21 |
-
"rewards/chosen": 0.
|
22 |
-
"rewards/margins": 0.
|
23 |
-
"rewards/rejected": -0.
|
24 |
"step": 10
|
25 |
},
|
26 |
{
|
27 |
"epoch": 0.13937282229965156,
|
28 |
-
"grad_norm": 8.
|
29 |
"learning_rate": 4.981198836496775e-07,
|
30 |
-
"logits/chosen": -2.
|
31 |
-
"logits/rejected": -2.
|
32 |
-
"logps/chosen": -231.
|
33 |
-
"logps/rejected": -203.
|
34 |
"loss": 0.6753,
|
35 |
-
"rewards/accuracies": 0.
|
36 |
-
"rewards/chosen": 0.
|
37 |
-
"rewards/margins": 0.
|
38 |
-
"rewards/rejected": -0.
|
39 |
"step": 20
|
40 |
},
|
41 |
{
|
42 |
"epoch": 0.20905923344947736,
|
43 |
-
"grad_norm": 10.
|
44 |
"learning_rate": 4.832481997086846e-07,
|
45 |
-
"logits/chosen": -2.
|
46 |
-
"logits/rejected": -2.
|
47 |
-
"logps/chosen": -225.
|
48 |
-
"logps/rejected": -253.
|
49 |
"loss": 0.6114,
|
50 |
"rewards/accuracies": 0.856249988079071,
|
51 |
-
"rewards/chosen": 0.
|
52 |
-
"rewards/margins": 0.
|
53 |
-
"rewards/rejected": -0.
|
54 |
"step": 30
|
55 |
},
|
56 |
{
|
57 |
"epoch": 0.2787456445993031,
|
58 |
-
"grad_norm": 19.
|
59 |
"learning_rate": 4.543962032878959e-07,
|
60 |
-
"logits/chosen": -2.
|
61 |
-
"logits/rejected": -2.
|
62 |
-
"logps/chosen": -241.
|
63 |
-
"logps/rejected": -256.
|
64 |
-
"loss": 0.
|
65 |
"rewards/accuracies": 0.8500000238418579,
|
66 |
-
"rewards/chosen": -0.
|
67 |
-
"rewards/margins": 0.
|
68 |
-
"rewards/rejected": -0.
|
69 |
"step": 40
|
70 |
},
|
71 |
{
|
72 |
"epoch": 0.34843205574912894,
|
73 |
-
"grad_norm":
|
74 |
"learning_rate": 4.1329321073844413e-07,
|
75 |
-
"logits/chosen": -2.
|
76 |
-
"logits/rejected": -2.
|
77 |
-
"logps/chosen": -
|
78 |
-
"logps/rejected": -
|
79 |
-
"loss": 0.
|
80 |
-
"rewards/accuracies": 0.
|
81 |
-
"rewards/chosen": -0.
|
82 |
-
"rewards/margins": 0.
|
83 |
-
"rewards/rejected": -0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
"step": 50
|
85 |
},
|
86 |
{
|
87 |
"epoch": 0.4181184668989547,
|
88 |
-
"grad_norm":
|
89 |
"learning_rate": 3.624028324136517e-07,
|
90 |
-
"logits/chosen": -1.
|
91 |
-
"logits/rejected": -
|
92 |
-
"logps/chosen": -
|
93 |
-
"logps/rejected": -362.
|
94 |
-
"loss": 0.
|
95 |
-
"rewards/accuracies": 0.
|
96 |
-
"rewards/chosen": -0.
|
97 |
-
"rewards/margins": 0.
|
98 |
-
"rewards/rejected": -1.
|
99 |
"step": 60
|
100 |
},
|
101 |
{
|
102 |
"epoch": 0.4878048780487805,
|
103 |
-
"grad_norm":
|
104 |
"learning_rate": 3.047753100392174e-07,
|
105 |
-
"logits/chosen": -0.
|
106 |
-
"logits/rejected": -0.
|
107 |
-
"logps/chosen": -
|
108 |
-
"logps/rejected": -
|
109 |
-
"loss": 0.
|
110 |
"rewards/accuracies": 0.862500011920929,
|
111 |
-
"rewards/chosen": -0.
|
112 |
-
"rewards/margins": 1.
|
113 |
-
"rewards/rejected": -1.
|
114 |
"step": 70
|
115 |
},
|
116 |
{
|
117 |
"epoch": 0.5574912891986062,
|
118 |
-
"grad_norm":
|
119 |
"learning_rate": 2.4386469286927194e-07,
|
120 |
-
"logits/chosen": -0.
|
121 |
-
"logits/rejected": 0.
|
122 |
-
"logps/chosen": -
|
123 |
-
"logps/rejected": -
|
124 |
-
"loss": 0.
|
125 |
-
"rewards/accuracies": 0.
|
126 |
-
"rewards/chosen": -0.
|
127 |
-
"rewards/margins": 1.
|
128 |
-
"rewards/rejected": -2.
|
129 |
"step": 80
|
130 |
},
|
131 |
{
|
132 |
"epoch": 0.627177700348432,
|
133 |
-
"grad_norm":
|
134 |
"learning_rate": 1.8332181063127542e-07,
|
135 |
-
"logits/chosen": -0.
|
136 |
-
"logits/rejected": 0.
|
137 |
-
"logps/chosen": -
|
138 |
-
"logps/rejected": -
|
139 |
"loss": 0.3319,
|
140 |
-
"rewards/accuracies": 0.
|
141 |
-
"rewards/chosen": -0.
|
142 |
-
"rewards/margins": 1.
|
143 |
-
"rewards/rejected": -2.
|
144 |
"step": 90
|
145 |
},
|
146 |
{
|
147 |
"epoch": 0.6968641114982579,
|
148 |
-
"grad_norm":
|
149 |
"learning_rate": 1.26775451942554e-07,
|
150 |
-
"logits/chosen": 0.
|
151 |
-
"logits/rejected":
|
152 |
-
"logps/chosen": -
|
153 |
-
"logps/rejected": -
|
154 |
-
"loss": 0.
|
155 |
-
"rewards/accuracies": 0.
|
156 |
-
"rewards/chosen": -0.
|
157 |
-
"rewards/margins": 1.
|
158 |
-
"rewards/rejected": -2.
|
159 |
"step": 100
|
160 |
},
|
161 |
{
|
162 |
"epoch": 0.6968641114982579,
|
163 |
-
"eval_logits/chosen": 0.
|
164 |
-
"eval_logits/rejected": 0.
|
165 |
-
"eval_logps/chosen": -
|
166 |
-
"eval_logps/rejected": -
|
167 |
-
"eval_loss": 0.
|
168 |
-
"eval_rewards/accuracies": 0.
|
169 |
-
"eval_rewards/chosen": -0.
|
170 |
-
"eval_rewards/margins": 0.
|
171 |
-
"eval_rewards/rejected": -1.
|
172 |
-
"eval_runtime":
|
173 |
-
"eval_samples_per_second": 19.
|
174 |
-
"eval_steps_per_second": 0.
|
175 |
"step": 100
|
176 |
},
|
177 |
{
|
178 |
"epoch": 0.7665505226480837,
|
179 |
-
"grad_norm":
|
180 |
"learning_rate": 7.761486381573326e-08,
|
181 |
-
"logits/chosen": 0.
|
182 |
-
"logits/rejected": 1.
|
183 |
-
"logps/chosen": -
|
184 |
-
"logps/rejected": -
|
185 |
-
"loss": 0.
|
186 |
"rewards/accuracies": 0.8687499761581421,
|
187 |
-
"rewards/chosen": -0.
|
188 |
-
"rewards/margins": 1.
|
189 |
-
"rewards/rejected": -2.
|
190 |
"step": 110
|
191 |
},
|
192 |
{
|
193 |
"epoch": 0.8362369337979094,
|
194 |
-
"grad_norm":
|
195 |
"learning_rate": 3.878660868757322e-08,
|
196 |
-
"logits/chosen": 0.
|
197 |
-
"logits/rejected": 1.
|
198 |
-
"logps/chosen": -
|
199 |
-
"logps/rejected": -
|
200 |
-
"loss": 0.
|
201 |
"rewards/accuracies": 0.8812500238418579,
|
202 |
-
"rewards/chosen": -0.
|
203 |
-
"rewards/margins": 1.
|
204 |
-
"rewards/rejected": -2.
|
205 |
"step": 120
|
206 |
},
|
207 |
{
|
208 |
"epoch": 0.9059233449477352,
|
209 |
-
"grad_norm":
|
210 |
"learning_rate": 1.261795485174083e-08,
|
211 |
-
"logits/chosen": 0.
|
212 |
-
"logits/rejected": 1.
|
213 |
-
"logps/chosen": -
|
214 |
-
"logps/rejected": -
|
215 |
-
"loss": 0.
|
216 |
"rewards/accuracies": 0.918749988079071,
|
217 |
-
"rewards/chosen": -0.
|
218 |
-
"rewards/margins": 1.
|
219 |
-
"rewards/rejected": -2.
|
220 |
"step": 130
|
221 |
},
|
222 |
{
|
223 |
"epoch": 0.975609756097561,
|
224 |
-
"grad_norm":
|
225 |
"learning_rate": 6.773858303274482e-10,
|
226 |
-
"logits/chosen": 0.
|
227 |
-
"logits/rejected": 1.
|
228 |
-
"logps/chosen": -
|
229 |
-
"logps/rejected": -
|
230 |
-
"loss": 0.
|
231 |
-
"rewards/accuracies": 0.
|
232 |
-
"rewards/chosen": -0.
|
233 |
-
"rewards/margins": 1.
|
234 |
-
"rewards/rejected": -2.
|
235 |
"step": 140
|
236 |
},
|
237 |
{
|
238 |
"epoch": 0.9965156794425087,
|
239 |
"step": 143,
|
240 |
"total_flos": 0.0,
|
241 |
-
"train_loss": 0.
|
242 |
-
"train_runtime":
|
243 |
-
"train_samples_per_second": 5.
|
244 |
-
"train_steps_per_second": 0.
|
245 |
}
|
246 |
],
|
247 |
"logging_steps": 10,
|
|
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
"epoch": 0.9965156794425087,
|
5 |
+
"eval_steps": 50,
|
6 |
"global_step": 143,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.06968641114982578,
|
13 |
+
"grad_norm": 8.879917985004713,
|
14 |
"learning_rate": 3.333333333333333e-07,
|
15 |
+
"logits/chosen": -2.5109400749206543,
|
16 |
+
"logits/rejected": -2.4602229595184326,
|
17 |
+
"logps/chosen": -224.4826202392578,
|
18 |
+
"logps/rejected": -205.3321075439453,
|
19 |
"loss": 0.6925,
|
20 |
+
"rewards/accuracies": 0.512499988079071,
|
21 |
+
"rewards/chosen": 0.0009716759668663144,
|
22 |
+
"rewards/margins": 0.001607558922842145,
|
23 |
+
"rewards/rejected": -0.0006358829559758306,
|
24 |
"step": 10
|
25 |
},
|
26 |
{
|
27 |
"epoch": 0.13937282229965156,
|
28 |
+
"grad_norm": 8.119644042984005,
|
29 |
"learning_rate": 4.981198836496775e-07,
|
30 |
+
"logits/chosen": -2.538778305053711,
|
31 |
+
"logits/rejected": -2.402923583984375,
|
32 |
+
"logps/chosen": -231.76394653320312,
|
33 |
+
"logps/rejected": -203.54684448242188,
|
34 |
"loss": 0.6753,
|
35 |
+
"rewards/accuracies": 0.7875000238418579,
|
36 |
+
"rewards/chosen": 0.008117455057799816,
|
37 |
+
"rewards/margins": 0.029356980696320534,
|
38 |
+
"rewards/rejected": -0.02123952843248844,
|
39 |
"step": 20
|
40 |
},
|
41 |
{
|
42 |
"epoch": 0.20905923344947736,
|
43 |
+
"grad_norm": 10.589687412392658,
|
44 |
"learning_rate": 4.832481997086846e-07,
|
45 |
+
"logits/chosen": -2.5253872871398926,
|
46 |
+
"logits/rejected": -2.430739402770996,
|
47 |
+
"logps/chosen": -225.355712890625,
|
48 |
+
"logps/rejected": -253.0909881591797,
|
49 |
"loss": 0.6114,
|
50 |
"rewards/accuracies": 0.856249988079071,
|
51 |
+
"rewards/chosen": 0.030127260833978653,
|
52 |
+
"rewards/margins": 0.18957777321338654,
|
53 |
+
"rewards/rejected": -0.15945051610469818,
|
54 |
"step": 30
|
55 |
},
|
56 |
{
|
57 |
"epoch": 0.2787456445993031,
|
58 |
+
"grad_norm": 19.230572297481732,
|
59 |
"learning_rate": 4.543962032878959e-07,
|
60 |
+
"logits/chosen": -2.4045207500457764,
|
61 |
+
"logits/rejected": -2.3013105392456055,
|
62 |
+
"logps/chosen": -241.72854614257812,
|
63 |
+
"logps/rejected": -256.7030944824219,
|
64 |
+
"loss": 0.5447,
|
65 |
"rewards/accuracies": 0.8500000238418579,
|
66 |
+
"rewards/chosen": -0.04323701187968254,
|
67 |
+
"rewards/margins": 0.447670042514801,
|
68 |
+
"rewards/rejected": -0.4909070134162903,
|
69 |
"step": 40
|
70 |
},
|
71 |
{
|
72 |
"epoch": 0.34843205574912894,
|
73 |
+
"grad_norm": 23.777968472263748,
|
74 |
"learning_rate": 4.1329321073844413e-07,
|
75 |
+
"logits/chosen": -2.2776694297790527,
|
76 |
+
"logits/rejected": -2.1192574501037598,
|
77 |
+
"logps/chosen": -255.7515411376953,
|
78 |
+
"logps/rejected": -307.02410888671875,
|
79 |
+
"loss": 0.4719,
|
80 |
+
"rewards/accuracies": 0.793749988079071,
|
81 |
+
"rewards/chosen": -0.31196385622024536,
|
82 |
+
"rewards/margins": 0.6551374197006226,
|
83 |
+
"rewards/rejected": -0.9671012163162231,
|
84 |
+
"step": 50
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"epoch": 0.34843205574912894,
|
88 |
+
"eval_logits/chosen": -1.996337890625,
|
89 |
+
"eval_logits/rejected": -1.9396870136260986,
|
90 |
+
"eval_logps/chosen": -301.5677795410156,
|
91 |
+
"eval_logps/rejected": -352.47076416015625,
|
92 |
+
"eval_loss": 0.5898596048355103,
|
93 |
+
"eval_rewards/accuracies": 0.70703125,
|
94 |
+
"eval_rewards/chosen": -0.3894880414009094,
|
95 |
+
"eval_rewards/margins": 0.5086125135421753,
|
96 |
+
"eval_rewards/rejected": -0.8981005549430847,
|
97 |
+
"eval_runtime": 102.5575,
|
98 |
+
"eval_samples_per_second": 19.501,
|
99 |
+
"eval_steps_per_second": 0.312,
|
100 |
"step": 50
|
101 |
},
|
102 |
{
|
103 |
"epoch": 0.4181184668989547,
|
104 |
+
"grad_norm": 33.164641300897365,
|
105 |
"learning_rate": 3.624028324136517e-07,
|
106 |
+
"logits/chosen": -1.2023751735687256,
|
107 |
+
"logits/rejected": -0.8880468606948853,
|
108 |
+
"logps/chosen": -297.7267150878906,
|
109 |
+
"logps/rejected": -362.5870056152344,
|
110 |
+
"loss": 0.4024,
|
111 |
+
"rewards/accuracies": 0.8500000238418579,
|
112 |
+
"rewards/chosen": -0.5735751986503601,
|
113 |
+
"rewards/margins": 0.9970341920852661,
|
114 |
+
"rewards/rejected": -1.570609211921692,
|
115 |
"step": 60
|
116 |
},
|
117 |
{
|
118 |
"epoch": 0.4878048780487805,
|
119 |
+
"grad_norm": 26.00066903060501,
|
120 |
"learning_rate": 3.047753100392174e-07,
|
121 |
+
"logits/chosen": -0.6797115802764893,
|
122 |
+
"logits/rejected": -0.30923840403556824,
|
123 |
+
"logps/chosen": -267.3857116699219,
|
124 |
+
"logps/rejected": -395.60101318359375,
|
125 |
+
"loss": 0.362,
|
126 |
"rewards/accuracies": 0.862500011920929,
|
127 |
+
"rewards/chosen": -0.5475583076477051,
|
128 |
+
"rewards/margins": 1.2465879917144775,
|
129 |
+
"rewards/rejected": -1.7941462993621826,
|
130 |
"step": 70
|
131 |
},
|
132 |
{
|
133 |
"epoch": 0.5574912891986062,
|
134 |
+
"grad_norm": 36.007825212896435,
|
135 |
"learning_rate": 2.4386469286927194e-07,
|
136 |
+
"logits/chosen": -0.32210594415664673,
|
137 |
+
"logits/rejected": 0.29763275384902954,
|
138 |
+
"logps/chosen": -286.4994812011719,
|
139 |
+
"logps/rejected": -484.10357666015625,
|
140 |
+
"loss": 0.3448,
|
141 |
+
"rewards/accuracies": 0.8812500238418579,
|
142 |
+
"rewards/chosen": -0.6800082921981812,
|
143 |
+
"rewards/margins": 1.5236244201660156,
|
144 |
+
"rewards/rejected": -2.2036328315734863,
|
145 |
"step": 80
|
146 |
},
|
147 |
{
|
148 |
"epoch": 0.627177700348432,
|
149 |
+
"grad_norm": 28.429320380976726,
|
150 |
"learning_rate": 1.8332181063127542e-07,
|
151 |
+
"logits/chosen": -0.29179516434669495,
|
152 |
+
"logits/rejected": 0.5656725168228149,
|
153 |
+
"logps/chosen": -315.7966613769531,
|
154 |
+
"logps/rejected": -458.8309631347656,
|
155 |
"loss": 0.3319,
|
156 |
+
"rewards/accuracies": 0.893750011920929,
|
157 |
+
"rewards/chosen": -0.6928261518478394,
|
158 |
+
"rewards/margins": 1.5519336462020874,
|
159 |
+
"rewards/rejected": -2.244760036468506,
|
160 |
"step": 90
|
161 |
},
|
162 |
{
|
163 |
"epoch": 0.6968641114982579,
|
164 |
+
"grad_norm": 29.97250123159769,
|
165 |
"learning_rate": 1.26775451942554e-07,
|
166 |
+
"logits/chosen": -0.13269878923892975,
|
167 |
+
"logits/rejected": 0.6727190017700195,
|
168 |
+
"logps/chosen": -300.4376220703125,
|
169 |
+
"logps/rejected": -451.68609619140625,
|
170 |
+
"loss": 0.3224,
|
171 |
+
"rewards/accuracies": 0.8687499761581421,
|
172 |
+
"rewards/chosen": -0.6514891982078552,
|
173 |
+
"rewards/margins": 1.551584243774414,
|
174 |
+
"rewards/rejected": -2.203073501586914,
|
175 |
"step": 100
|
176 |
},
|
177 |
{
|
178 |
"epoch": 0.6968641114982579,
|
179 |
+
"eval_logits/chosen": -0.2416563630104065,
|
180 |
+
"eval_logits/rejected": 0.21337364614009857,
|
181 |
+
"eval_logps/chosen": -330.0835266113281,
|
182 |
+
"eval_logps/rejected": -419.1961364746094,
|
183 |
+
"eval_loss": 0.5598118305206299,
|
184 |
+
"eval_rewards/accuracies": 0.75,
|
185 |
+
"eval_rewards/chosen": -0.6746450662612915,
|
186 |
+
"eval_rewards/margins": 0.8907086849212646,
|
187 |
+
"eval_rewards/rejected": -1.5653537511825562,
|
188 |
+
"eval_runtime": 100.9616,
|
189 |
+
"eval_samples_per_second": 19.81,
|
190 |
+
"eval_steps_per_second": 0.317,
|
191 |
"step": 100
|
192 |
},
|
193 |
{
|
194 |
"epoch": 0.7665505226480837,
|
195 |
+
"grad_norm": 26.411152004320307,
|
196 |
"learning_rate": 7.761486381573326e-08,
|
197 |
+
"logits/chosen": 0.1635294407606125,
|
198 |
+
"logits/rejected": 1.3954848051071167,
|
199 |
+
"logps/chosen": -325.93487548828125,
|
200 |
+
"logps/rejected": -446.07916259765625,
|
201 |
+
"loss": 0.3142,
|
202 |
"rewards/accuracies": 0.8687499761581421,
|
203 |
+
"rewards/chosen": -0.7772396206855774,
|
204 |
+
"rewards/margins": 1.6457939147949219,
|
205 |
+
"rewards/rejected": -2.4230334758758545,
|
206 |
"step": 110
|
207 |
},
|
208 |
{
|
209 |
"epoch": 0.8362369337979094,
|
210 |
+
"grad_norm": 32.75502406597345,
|
211 |
"learning_rate": 3.878660868757322e-08,
|
212 |
+
"logits/chosen": 0.5776845216751099,
|
213 |
+
"logits/rejected": 1.9672679901123047,
|
214 |
+
"logps/chosen": -338.0449523925781,
|
215 |
+
"logps/rejected": -449.5287170410156,
|
216 |
+
"loss": 0.3042,
|
217 |
"rewards/accuracies": 0.8812500238418579,
|
218 |
+
"rewards/chosen": -0.9358948469161987,
|
219 |
+
"rewards/margins": 1.7070610523223877,
|
220 |
+
"rewards/rejected": -2.642955780029297,
|
221 |
"step": 120
|
222 |
},
|
223 |
{
|
224 |
"epoch": 0.9059233449477352,
|
225 |
+
"grad_norm": 34.08869226634673,
|
226 |
"learning_rate": 1.261795485174083e-08,
|
227 |
+
"logits/chosen": 0.4366391599178314,
|
228 |
+
"logits/rejected": 1.6738389730453491,
|
229 |
+
"logps/chosen": -295.5234375,
|
230 |
+
"logps/rejected": -462.7455139160156,
|
231 |
+
"loss": 0.3275,
|
232 |
"rewards/accuracies": 0.918749988079071,
|
233 |
+
"rewards/chosen": -0.7988389730453491,
|
234 |
+
"rewards/margins": 1.923356294631958,
|
235 |
+
"rewards/rejected": -2.7221951484680176,
|
236 |
"step": 130
|
237 |
},
|
238 |
{
|
239 |
"epoch": 0.975609756097561,
|
240 |
+
"grad_norm": 33.23571195007149,
|
241 |
"learning_rate": 6.773858303274482e-10,
|
242 |
+
"logits/chosen": 0.5510319471359253,
|
243 |
+
"logits/rejected": 1.6277908086776733,
|
244 |
+
"logps/chosen": -300.76043701171875,
|
245 |
+
"logps/rejected": -455.19793701171875,
|
246 |
+
"loss": 0.3272,
|
247 |
+
"rewards/accuracies": 0.887499988079071,
|
248 |
+
"rewards/chosen": -0.9057596325874329,
|
249 |
+
"rewards/margins": 1.6654990911483765,
|
250 |
+
"rewards/rejected": -2.571258544921875,
|
251 |
"step": 140
|
252 |
},
|
253 |
{
|
254 |
"epoch": 0.9965156794425087,
|
255 |
"step": 143,
|
256 |
"total_flos": 0.0,
|
257 |
+
"train_loss": 0.42797933008287337,
|
258 |
+
"train_runtime": 3631.7108,
|
259 |
+
"train_samples_per_second": 5.05,
|
260 |
+
"train_steps_per_second": 0.039
|
261 |
}
|
262 |
],
|
263 |
"logging_steps": 10,
|