Model save
Browse files- README.md +13 -13
- all_results.json +3 -3
- model-00001-of-00003.safetensors +1 -1
- model-00002-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +1 -1
- train_results.json +3 -3
- trainer_state.json +413 -413
README.md
CHANGED
@@ -17,15 +17,15 @@ should probably proofread and complete it, then remove this comment. -->
|
|
17 |
|
18 |
This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-sft-beta](https://huggingface.co/HuggingFaceH4/mistral-7b-sft-beta) on the None dataset.
|
19 |
It achieves the following results on the evaluation set:
|
20 |
-
- Loss: 0.
|
21 |
-
- Rewards/chosen: -2.
|
22 |
-
- Rewards/rejected: -2.
|
23 |
- Rewards/accuracies: 0.625
|
24 |
-
- Rewards/margins: 0.
|
25 |
-
- Logps/rejected: -
|
26 |
-
- Logps/chosen: -
|
27 |
-
- Logits/rejected: -2.
|
28 |
-
- Logits/chosen: -2.
|
29 |
|
30 |
## Model description
|
31 |
|
@@ -47,7 +47,7 @@ The following hyperparameters were used during training:
|
|
47 |
- learning_rate: 5e-07
|
48 |
- train_batch_size: 8
|
49 |
- eval_batch_size: 8
|
50 |
-
- seed:
|
51 |
- distributed_type: multi-GPU
|
52 |
- num_devices: 8
|
53 |
- gradient_accumulation_steps: 2
|
@@ -62,10 +62,10 @@ The following hyperparameters were used during training:
|
|
62 |
|
63 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
64 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
65 |
-
| 0.
|
66 |
-
| 0.
|
67 |
-
| 0.
|
68 |
-
| 0.
|
69 |
|
70 |
|
71 |
### Framework versions
|
|
|
17 |
|
18 |
This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-sft-beta](https://huggingface.co/HuggingFaceH4/mistral-7b-sft-beta) on the None dataset.
|
19 |
It achieves the following results on the evaluation set:
|
20 |
+
- Loss: 0.0660
|
21 |
+
- Rewards/chosen: -2.5606
|
22 |
+
- Rewards/rejected: -2.9549
|
23 |
- Rewards/accuracies: 0.625
|
24 |
+
- Rewards/margins: 0.3944
|
25 |
+
- Logps/rejected: -552.8470
|
26 |
+
- Logps/chosen: -513.0960
|
27 |
+
- Logits/rejected: -2.2459
|
28 |
+
- Logits/chosen: -2.2708
|
29 |
|
30 |
## Model description
|
31 |
|
|
|
47 |
- learning_rate: 5e-07
|
48 |
- train_batch_size: 8
|
49 |
- eval_batch_size: 8
|
50 |
+
- seed: 4
|
51 |
- distributed_type: multi-GPU
|
52 |
- num_devices: 8
|
53 |
- gradient_accumulation_steps: 2
|
|
|
62 |
|
63 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
64 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
65 |
+
| 0.0437 | 0.25 | 100 | 0.0824 | -2.2538 | -2.4741 | 0.5859 | 0.2203 | -504.7590 | -482.4154 | -2.3143 | -2.3260 |
|
66 |
+
| 0.0258 | 0.49 | 200 | 0.0581 | -2.8677 | -3.2192 | 0.5977 | 0.3515 | -579.2755 | -543.8072 | -2.1155 | -2.1394 |
|
67 |
+
| 0.0402 | 0.74 | 300 | 0.0837 | -2.0997 | -2.5006 | 0.6289 | 0.4009 | -507.4115 | -467.0057 | -2.2751 | -2.2980 |
|
68 |
+
| 0.0288 | 0.99 | 400 | 0.0660 | -2.5606 | -2.9549 | 0.625 | 0.3944 | -552.8470 | -513.0960 | -2.2459 | -2.2708 |
|
69 |
|
70 |
|
71 |
### Framework versions
|
all_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 51894,
|
6 |
-
"train_samples_per_second": 13.
|
7 |
"train_steps_per_second": 0.108
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"train_loss": 0.07151281171374851,
|
4 |
+
"train_runtime": 3738.25,
|
5 |
"train_samples": 51894,
|
6 |
+
"train_samples_per_second": 13.882,
|
7 |
"train_steps_per_second": 0.108
|
8 |
}
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4943162336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb2741318d25fa010663fe61ed02f4f293fa8ad301934c24bbabaf6e60633fb3
|
3 |
size 4943162336
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999819336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f559ba79771ca5e5cdbae085a55b0de304927c43a4793b3f8234d1f33152354
|
3 |
size 4999819336
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4540516344
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:901f2833dc2e0f3adb4f4bd18d3a372877da1018c193c779ed31f78d98f1f0a4
|
3 |
size 4540516344
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 51894,
|
6 |
-
"train_samples_per_second": 13.
|
7 |
"train_steps_per_second": 0.108
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"train_loss": 0.07151281171374851,
|
4 |
+
"train_runtime": 3738.25,
|
5 |
"train_samples": 51894,
|
6 |
+
"train_samples_per_second": 13.882,
|
7 |
"train_steps_per_second": 0.108
|
8 |
}
|
trainer_state.json
CHANGED
@@ -11,11 +11,11 @@
|
|
11 |
{
|
12 |
"epoch": 0.0,
|
13 |
"learning_rate": 1.2195121951219512e-08,
|
14 |
-
"logits/chosen": -2.
|
15 |
-
"logits/rejected": -2.
|
16 |
-
"logps/chosen": -
|
17 |
-
"logps/rejected": -
|
18 |
-
"loss": 0.
|
19 |
"rewards/accuracies": 0.0,
|
20 |
"rewards/chosen": 0.0,
|
21 |
"rewards/margins": 0.0,
|
@@ -25,634 +25,634 @@
|
|
25 |
{
|
26 |
"epoch": 0.02,
|
27 |
"learning_rate": 1.219512195121951e-07,
|
28 |
-
"logits/chosen": -2.
|
29 |
-
"logits/rejected": -2.
|
30 |
-
"logps/chosen": -
|
31 |
-
"logps/rejected": -114.
|
32 |
-
"loss": 0.
|
33 |
-
"rewards/accuracies": 0.
|
34 |
-
"rewards/chosen": 0.
|
35 |
-
"rewards/margins": 0.
|
36 |
-
"rewards/rejected": -0.
|
37 |
"step": 10
|
38 |
},
|
39 |
{
|
40 |
"epoch": 0.05,
|
41 |
"learning_rate": 2.439024390243902e-07,
|
42 |
-
"logits/chosen": -2.
|
43 |
-
"logits/rejected": -2.
|
44 |
-
"logps/chosen": -
|
45 |
-
"logps/rejected": -
|
46 |
-
"loss": 0.
|
47 |
-
"rewards/accuracies": 0.
|
48 |
-
"rewards/chosen": 0.
|
49 |
-
"rewards/margins": 0.
|
50 |
-
"rewards/rejected": -0.
|
51 |
"step": 20
|
52 |
},
|
53 |
{
|
54 |
"epoch": 0.07,
|
55 |
"learning_rate": 3.6585365853658536e-07,
|
56 |
-
"logits/chosen": -2.
|
57 |
-
"logits/rejected": -2.
|
58 |
-
"logps/chosen": -
|
59 |
-
"logps/rejected": -
|
60 |
-
"loss": 0.
|
61 |
-
"rewards/accuracies": 0.
|
62 |
-
"rewards/chosen": 0.
|
63 |
-
"rewards/margins": 0.
|
64 |
-
"rewards/rejected": -0.
|
65 |
"step": 30
|
66 |
},
|
67 |
{
|
68 |
"epoch": 0.1,
|
69 |
"learning_rate": 4.878048780487804e-07,
|
70 |
-
"logits/chosen": -2.
|
71 |
-
"logits/rejected": -2.
|
72 |
-
"logps/chosen": -
|
73 |
-
"logps/rejected": -
|
74 |
-
"loss": 0.
|
75 |
-
"rewards/accuracies": 0.
|
76 |
-
"rewards/chosen": 0.
|
77 |
-
"rewards/margins": 0.
|
78 |
-
"rewards/rejected": -0.
|
79 |
"step": 40
|
80 |
},
|
81 |
{
|
82 |
"epoch": 0.12,
|
83 |
"learning_rate": 4.992461696250783e-07,
|
84 |
-
"logits/chosen": -2.
|
85 |
-
"logits/rejected": -2.
|
86 |
-
"logps/chosen": -
|
87 |
-
"logps/rejected": -
|
88 |
-
"loss": 0.
|
89 |
-
"rewards/accuracies": 0.
|
90 |
-
"rewards/chosen": -0.
|
91 |
-
"rewards/margins": 0.
|
92 |
-
"rewards/rejected": -0.
|
93 |
"step": 50
|
94 |
},
|
95 |
{
|
96 |
"epoch": 0.15,
|
97 |
"learning_rate": 4.966461721767899e-07,
|
98 |
-
"logits/chosen": -2.
|
99 |
-
"logits/rejected": -2.
|
100 |
-
"logps/chosen": -
|
101 |
-
"logps/rejected": -
|
102 |
-
"loss": 0.
|
103 |
-
"rewards/accuracies": 0.
|
104 |
-
"rewards/chosen": -0.
|
105 |
-
"rewards/margins": 0.
|
106 |
-
"rewards/rejected": -1.
|
107 |
"step": 60
|
108 |
},
|
109 |
{
|
110 |
"epoch": 0.17,
|
111 |
"learning_rate": 4.922100518015975e-07,
|
112 |
-
"logits/chosen": -2.
|
113 |
-
"logits/rejected": -2.
|
114 |
-
"logps/chosen": -
|
115 |
-
"logps/rejected": -
|
116 |
-
"loss": 0.
|
117 |
-
"rewards/accuracies": 0.
|
118 |
-
"rewards/chosen": -0.
|
119 |
-
"rewards/margins": 1.
|
120 |
-
"rewards/rejected": -1.
|
121 |
"step": 70
|
122 |
},
|
123 |
{
|
124 |
"epoch": 0.2,
|
125 |
"learning_rate": 4.859708325770919e-07,
|
126 |
-
"logits/chosen": -2.
|
127 |
-
"logits/rejected": -2.
|
128 |
-
"logps/chosen": -472.
|
129 |
-
"logps/rejected": -
|
130 |
-
"loss": 0.
|
131 |
-
"rewards/accuracies": 0.
|
132 |
-
"rewards/chosen": -0.
|
133 |
-
"rewards/margins": 1.
|
134 |
-
"rewards/rejected": -2.
|
135 |
"step": 80
|
136 |
},
|
137 |
{
|
138 |
"epoch": 0.22,
|
139 |
"learning_rate": 4.779749614980225e-07,
|
140 |
-
"logits/chosen": -2.
|
141 |
-
"logits/rejected": -2.
|
142 |
-
"logps/chosen": -
|
143 |
-
"logps/rejected": -
|
144 |
-
"loss": 0.
|
145 |
-
"rewards/accuracies": 0.
|
146 |
-
"rewards/chosen": -0.
|
147 |
-
"rewards/margins": 1.
|
148 |
-
"rewards/rejected": -2.
|
149 |
"step": 90
|
150 |
},
|
151 |
{
|
152 |
"epoch": 0.25,
|
153 |
"learning_rate": 4.682819627081427e-07,
|
154 |
-
"logits/chosen": -2.
|
155 |
-
"logits/rejected": -2.
|
156 |
-
"logps/chosen": -
|
157 |
-
"logps/rejected": -
|
158 |
-
"loss": 0.
|
159 |
-
"rewards/accuracies": 0.
|
160 |
-
"rewards/chosen": -0.
|
161 |
-
"rewards/margins":
|
162 |
-
"rewards/rejected": -2.
|
163 |
"step": 100
|
164 |
},
|
165 |
{
|
166 |
"epoch": 0.25,
|
167 |
-
"eval_logits/chosen": -2.
|
168 |
-
"eval_logits/rejected": -2.
|
169 |
-
"eval_logps/chosen": -
|
170 |
-
"eval_logps/rejected": -
|
171 |
-
"eval_loss": 0.
|
172 |
-
"eval_rewards/accuracies": 0.
|
173 |
-
"eval_rewards/chosen": -
|
174 |
-
"eval_rewards/margins": 0.
|
175 |
-
"eval_rewards/rejected": -
|
176 |
-
"eval_runtime": 53.
|
177 |
-
"eval_samples_per_second": 37.
|
178 |
"eval_steps_per_second": 0.6,
|
179 |
"step": 100
|
180 |
},
|
181 |
{
|
182 |
"epoch": 0.27,
|
183 |
"learning_rate": 4.569639943810477e-07,
|
184 |
-
"logits/chosen": -2.
|
185 |
-
"logits/rejected": -2.
|
186 |
-
"logps/chosen": -
|
187 |
-
"logps/rejected": -
|
188 |
-
"loss": 0.
|
189 |
-
"rewards/accuracies": 0.
|
190 |
-
"rewards/chosen": -
|
191 |
-
"rewards/margins": 1.
|
192 |
-
"rewards/rejected": -
|
193 |
"step": 110
|
194 |
},
|
195 |
{
|
196 |
"epoch": 0.3,
|
197 |
"learning_rate": 4.4410531154874543e-07,
|
198 |
-
"logits/chosen": -2.
|
199 |
-
"logits/rejected": -2.
|
200 |
-
"logps/chosen": -
|
201 |
-
"logps/rejected": -
|
202 |
-
"loss": 0.
|
203 |
-
"rewards/accuracies": 0.
|
204 |
-
"rewards/chosen": -
|
205 |
-
"rewards/margins": 1.
|
206 |
-
"rewards/rejected": -2.
|
207 |
"step": 120
|
208 |
},
|
209 |
{
|
210 |
"epoch": 0.32,
|
211 |
"learning_rate": 4.298016388768561e-07,
|
212 |
-
"logits/chosen": -2.
|
213 |
-
"logits/rejected": -2.
|
214 |
-
"logps/chosen": -
|
215 |
-
"logps/rejected": -
|
216 |
-
"loss": 0.
|
217 |
-
"rewards/accuracies": 0.
|
218 |
-
"rewards/chosen": -
|
219 |
-
"rewards/margins":
|
220 |
-
"rewards/rejected": -2.
|
221 |
"step": 130
|
222 |
},
|
223 |
{
|
224 |
"epoch": 0.35,
|
225 |
"learning_rate": 4.1415945805573005e-07,
|
226 |
-
"logits/chosen": -2.
|
227 |
-
"logits/rejected": -2.
|
228 |
-
"logps/chosen": -
|
229 |
-
"logps/rejected": -
|
230 |
-
"loss": 0.
|
231 |
-
"rewards/accuracies": 0.
|
232 |
-
"rewards/chosen": -
|
233 |
-
"rewards/margins":
|
234 |
-
"rewards/rejected": -
|
235 |
"step": 140
|
236 |
},
|
237 |
{
|
238 |
"epoch": 0.37,
|
239 |
"learning_rate": 3.972952151123984e-07,
|
240 |
-
"logits/chosen": -2.
|
241 |
-
"logits/rejected": -2.
|
242 |
-
"logps/chosen": -
|
243 |
-
"logps/rejected": -
|
244 |
-
"loss": 0.
|
245 |
-
"rewards/accuracies": 0.
|
246 |
-
"rewards/chosen": -
|
247 |
-
"rewards/margins":
|
248 |
-
"rewards/rejected": -
|
249 |
"step": 150
|
250 |
},
|
251 |
{
|
252 |
"epoch": 0.39,
|
253 |
"learning_rate": 3.793344535444142e-07,
|
254 |
-
"logits/chosen": -2.
|
255 |
-
"logits/rejected": -2.
|
256 |
-
"logps/chosen": -
|
257 |
-
"logps/rejected": -
|
258 |
-
"loss": 0.
|
259 |
-
"rewards/accuracies": 0.
|
260 |
-
"rewards/chosen": -
|
261 |
-
"rewards/margins":
|
262 |
-
"rewards/rejected": -
|
263 |
"step": 160
|
264 |
},
|
265 |
{
|
266 |
"epoch": 0.42,
|
267 |
"learning_rate": 3.604108797288461e-07,
|
268 |
-
"logits/chosen": -2.
|
269 |
-
"logits/rejected": -2.
|
270 |
-
"logps/chosen": -
|
271 |
-
"logps/rejected": -
|
272 |
-
"loss": 0.
|
273 |
-
"rewards/accuracies": 0.
|
274 |
-
"rewards/chosen": -1.
|
275 |
-
"rewards/margins":
|
276 |
-
"rewards/rejected": -
|
277 |
"step": 170
|
278 |
},
|
279 |
{
|
280 |
"epoch": 0.44,
|
281 |
"learning_rate": 3.40665367563858e-07,
|
282 |
-
"logits/chosen": -2.
|
283 |
-
"logits/rejected": -2.
|
284 |
-
"logps/chosen": -
|
285 |
-
"logps/rejected": -
|
286 |
-
"loss": 0.
|
287 |
-
"rewards/accuracies": 0.
|
288 |
-
"rewards/chosen": -
|
289 |
-
"rewards/margins":
|
290 |
-
"rewards/rejected": -3.
|
291 |
"step": 180
|
292 |
},
|
293 |
{
|
294 |
"epoch": 0.47,
|
295 |
"learning_rate": 3.202449097526798e-07,
|
296 |
-
"logits/chosen": -2.
|
297 |
-
"logits/rejected": -2.
|
298 |
-
"logps/chosen": -
|
299 |
-
"logps/rejected": -
|
300 |
-
"loss": 0.
|
301 |
-
"rewards/accuracies": 0.
|
302 |
-
"rewards/chosen": -1.
|
303 |
-
"rewards/margins": 2.
|
304 |
-
"rewards/rejected": -3.
|
305 |
"step": 190
|
306 |
},
|
307 |
{
|
308 |
"epoch": 0.49,
|
309 |
"learning_rate": 2.993015235369905e-07,
|
310 |
-
"logits/chosen": -2.
|
311 |
-
"logits/rejected": -2.
|
312 |
-
"logps/chosen": -
|
313 |
-
"logps/rejected": -
|
314 |
-
"loss": 0.
|
315 |
-
"rewards/accuracies": 0.
|
316 |
-
"rewards/chosen": -1.
|
317 |
-
"rewards/margins":
|
318 |
-
"rewards/rejected": -3.
|
319 |
"step": 200
|
320 |
},
|
321 |
{
|
322 |
"epoch": 0.49,
|
323 |
-
"eval_logits/chosen": -2.
|
324 |
-
"eval_logits/rejected": -2.
|
325 |
-
"eval_logps/chosen": -
|
326 |
-
"eval_logps/rejected": -
|
327 |
-
"eval_loss": 0.
|
328 |
-
"eval_rewards/accuracies": 0.
|
329 |
-
"eval_rewards/chosen": -2.
|
330 |
-
"eval_rewards/margins": 0.
|
331 |
-
"eval_rewards/rejected": -
|
332 |
-
"eval_runtime": 53.
|
333 |
-
"eval_samples_per_second": 37.
|
334 |
-
"eval_steps_per_second": 0.
|
335 |
"step": 200
|
336 |
},
|
337 |
{
|
338 |
"epoch": 0.52,
|
339 |
"learning_rate": 2.7799111902582693e-07,
|
340 |
-
"logits/chosen": -2.
|
341 |
-
"logits/rejected": -2.
|
342 |
-
"logps/chosen": -
|
343 |
-
"logps/rejected": -
|
344 |
-
"loss": 0.
|
345 |
-
"rewards/accuracies": 0.
|
346 |
-
"rewards/chosen": -1.
|
347 |
-
"rewards/margins":
|
348 |
-
"rewards/rejected": -3.
|
349 |
"step": 210
|
350 |
},
|
351 |
{
|
352 |
"epoch": 0.54,
|
353 |
"learning_rate": 2.564723385445869e-07,
|
354 |
-
"logits/chosen": -2.
|
355 |
-
"logits/rejected": -2.
|
356 |
-
"logps/chosen": -
|
357 |
-
"logps/rejected": -
|
358 |
-
"loss": 0.
|
359 |
-
"rewards/accuracies": 0.
|
360 |
-
"rewards/chosen": -1.
|
361 |
-
"rewards/margins":
|
362 |
-
"rewards/rejected": -3.
|
363 |
"step": 220
|
364 |
},
|
365 |
{
|
366 |
"epoch": 0.57,
|
367 |
"learning_rate": 2.3490537564442845e-07,
|
368 |
-
"logits/chosen": -2.
|
369 |
-
"logits/rejected": -2.
|
370 |
-
"logps/chosen": -
|
371 |
-
"logps/rejected": -
|
372 |
-
"loss": 0.
|
373 |
-
"rewards/accuracies": 0.
|
374 |
-
"rewards/chosen": -1.
|
375 |
-
"rewards/margins": 1.
|
376 |
-
"rewards/rejected": -2.
|
377 |
"step": 230
|
378 |
},
|
379 |
{
|
380 |
"epoch": 0.59,
|
381 |
"learning_rate": 2.1345078256378801e-07,
|
382 |
-
"logits/chosen": -2.
|
383 |
-
"logits/rejected": -2.
|
384 |
-
"logps/chosen": -
|
385 |
-
"logps/rejected": -
|
386 |
-
"loss": 0.
|
387 |
-
"rewards/accuracies": 0.
|
388 |
-
"rewards/chosen": -1.
|
389 |
-
"rewards/margins":
|
390 |
-
"rewards/rejected": -3.
|
391 |
"step": 240
|
392 |
},
|
393 |
{
|
394 |
"epoch": 0.62,
|
395 |
"learning_rate": 1.9226827501969865e-07,
|
396 |
-
"logits/chosen": -2.
|
397 |
-
"logits/rejected": -2.
|
398 |
-
"logps/chosen": -
|
399 |
-
"logps/rejected": -
|
400 |
-
"loss": 0.
|
401 |
-
"rewards/accuracies": 0.
|
402 |
-
"rewards/chosen": -1.
|
403 |
-
"rewards/margins": 2.
|
404 |
-
"rewards/rejected": -3.
|
405 |
"step": 250
|
406 |
},
|
407 |
{
|
408 |
"epoch": 0.64,
|
409 |
"learning_rate": 1.715155432264775e-07,
|
410 |
-
"logits/chosen": -2.
|
411 |
-
"logits/rejected": -2.
|
412 |
-
"logps/chosen": -
|
413 |
-
"logps/rejected": -
|
414 |
-
"loss": 0.
|
415 |
-
"rewards/accuracies": 0.
|
416 |
-
"rewards/chosen": -1.
|
417 |
-
"rewards/margins": 2.
|
418 |
-
"rewards/rejected": -3.
|
419 |
"step": 260
|
420 |
},
|
421 |
{
|
422 |
"epoch": 0.67,
|
423 |
"learning_rate": 1.51347077992983e-07,
|
424 |
-
"logits/chosen": -2.
|
425 |
-
"logits/rejected": -2.
|
426 |
-
"logps/chosen": -
|
427 |
-
"logps/rejected": -
|
428 |
-
"loss": 0.
|
429 |
-
"rewards/accuracies": 0.
|
430 |
-
"rewards/chosen": -1.
|
431 |
-
"rewards/margins": 1.
|
432 |
-
"rewards/rejected": -3.
|
433 |
"step": 270
|
434 |
},
|
435 |
{
|
436 |
"epoch": 0.69,
|
437 |
"learning_rate": 1.3191302063739906e-07,
|
438 |
-
"logits/chosen": -2.
|
439 |
-
"logits/rejected": -2.
|
440 |
-
"logps/chosen": -
|
441 |
-
"logps/rejected": -
|
442 |
-
"loss": 0.
|
443 |
-
"rewards/accuracies": 0.
|
444 |
-
"rewards/chosen": -1.
|
445 |
-
"rewards/margins": 1.
|
446 |
-
"rewards/rejected": -3.
|
447 |
"step": 280
|
448 |
},
|
449 |
{
|
450 |
"epoch": 0.72,
|
451 |
"learning_rate": 1.1335804528119475e-07,
|
452 |
-
"logits/chosen": -2.
|
453 |
-
"logits/rejected": -2.
|
454 |
-
"logps/chosen": -
|
455 |
-
"logps/rejected": -
|
456 |
-
"loss": 0.
|
457 |
-
"rewards/accuracies": 0.
|
458 |
-
"rewards/chosen": -1.
|
459 |
-
"rewards/margins": 2.
|
460 |
-
"rewards/rejected": -3.
|
461 |
"step": 290
|
462 |
},
|
463 |
{
|
464 |
"epoch": 0.74,
|
465 |
"learning_rate": 9.582028184286423e-08,
|
466 |
-
"logits/chosen": -2.
|
467 |
-
"logits/rejected": -2.
|
468 |
-
"logps/chosen": -
|
469 |
-
"logps/rejected": -
|
470 |
-
"loss": 0.
|
471 |
-
"rewards/accuracies": 0.
|
472 |
-
"rewards/chosen": -1.
|
473 |
-
"rewards/margins":
|
474 |
-
"rewards/rejected": -3.
|
475 |
"step": 300
|
476 |
},
|
477 |
{
|
478 |
"epoch": 0.74,
|
479 |
-
"eval_logits/chosen": -2.
|
480 |
-
"eval_logits/rejected": -2.
|
481 |
-
"eval_logps/chosen": -
|
482 |
-
"eval_logps/rejected": -
|
483 |
-
"eval_loss": 0.
|
484 |
-
"eval_rewards/accuracies": 0.
|
485 |
-
"eval_rewards/chosen": -2.
|
486 |
-
"eval_rewards/margins": 0.
|
487 |
-
"eval_rewards/rejected": -2.
|
488 |
-
"eval_runtime": 53.
|
489 |
-
"eval_samples_per_second": 37.
|
490 |
"eval_steps_per_second": 0.6,
|
491 |
"step": 300
|
492 |
},
|
493 |
{
|
494 |
"epoch": 0.76,
|
495 |
"learning_rate": 7.943028774907065e-08,
|
496 |
-
"logits/chosen": -2.
|
497 |
-
"logits/rejected": -2.
|
498 |
-
"logps/chosen": -524.
|
499 |
-
"logps/rejected": -
|
500 |
-
"loss": 0.
|
501 |
-
"rewards/accuracies": 0.
|
502 |
-
"rewards/chosen": -1.
|
503 |
-
"rewards/margins":
|
504 |
-
"rewards/rejected": -3.
|
505 |
"step": 310
|
506 |
},
|
507 |
{
|
508 |
"epoch": 0.79,
|
509 |
"learning_rate": 6.431007601814637e-08,
|
510 |
-
"logits/chosen": -2.
|
511 |
-
"logits/rejected": -2.
|
512 |
-
"logps/chosen": -
|
513 |
-
"logps/rejected": -
|
514 |
-
"loss": 0.
|
515 |
-
"rewards/accuracies": 0.
|
516 |
-
"rewards/chosen": -1.
|
517 |
-
"rewards/margins": 1.
|
518 |
-
"rewards/rejected": -3.
|
519 |
"step": 320
|
520 |
},
|
521 |
{
|
522 |
"epoch": 0.81,
|
523 |
"learning_rate": 5.0572206951246e-08,
|
524 |
-
"logits/chosen": -2.
|
525 |
-
"logits/rejected": -2.
|
526 |
-
"logps/chosen": -
|
527 |
-
"logps/rejected": -
|
528 |
-
"loss": 0.
|
529 |
-
"rewards/accuracies": 0.
|
530 |
-
"rewards/chosen": -1.
|
531 |
-
"rewards/margins":
|
532 |
-
"rewards/rejected": -3.
|
533 |
"step": 330
|
534 |
},
|
535 |
{
|
536 |
"epoch": 0.84,
|
537 |
"learning_rate": 3.831895019292897e-08,
|
538 |
-
"logits/chosen": -2.
|
539 |
-
"logits/rejected": -2.
|
540 |
-
"logps/chosen": -
|
541 |
-
"logps/rejected": -
|
542 |
-
"loss": 0.
|
543 |
-
"rewards/accuracies": 0.
|
544 |
-
"rewards/chosen": -1.
|
545 |
-
"rewards/margins": 2.
|
546 |
-
"rewards/rejected": -3.
|
547 |
"step": 340
|
548 |
},
|
549 |
{
|
550 |
"epoch": 0.86,
|
551 |
"learning_rate": 2.764152339909756e-08,
|
552 |
-
"logits/chosen": -2.
|
553 |
-
"logits/rejected": -2.
|
554 |
-
"logps/chosen": -
|
555 |
-
"logps/rejected": -
|
556 |
-
"loss": 0.
|
557 |
-
"rewards/accuracies": 0.
|
558 |
-
"rewards/chosen": -1.
|
559 |
-
"rewards/margins":
|
560 |
-
"rewards/rejected": -3.
|
561 |
"step": 350
|
562 |
},
|
563 |
{
|
564 |
"epoch": 0.89,
|
565 |
"learning_rate": 1.861941317991664e-08,
|
566 |
-
"logits/chosen": -2.
|
567 |
-
"logits/rejected": -2.
|
568 |
-
"logps/chosen": -
|
569 |
-
"logps/rejected": -
|
570 |
-
"loss": 0.
|
571 |
-
"rewards/accuracies": 0.
|
572 |
-
"rewards/chosen": -1.
|
573 |
-
"rewards/margins": 2.
|
574 |
-
"rewards/rejected": -3.
|
575 |
"step": 360
|
576 |
},
|
577 |
{
|
578 |
"epoch": 0.91,
|
579 |
"learning_rate": 1.13197833728636e-08,
|
580 |
-
"logits/chosen": -2.
|
581 |
-
"logits/rejected": -2.
|
582 |
-
"logps/chosen": -
|
583 |
-
"logps/rejected": -
|
584 |
-
"loss": 0.
|
585 |
-
"rewards/accuracies": 0.
|
586 |
-
"rewards/chosen": -1.
|
587 |
-
"rewards/margins": 2.
|
588 |
-
"rewards/rejected": -3.
|
589 |
"step": 370
|
590 |
},
|
591 |
{
|
592 |
"epoch": 0.94,
|
593 |
"learning_rate": 5.79697505093521e-09,
|
594 |
-
"logits/chosen": -2.
|
595 |
-
"logits/rejected": -2.
|
596 |
-
"logps/chosen": -
|
597 |
-
"logps/rejected": -
|
598 |
-
"loss": 0.
|
599 |
-
"rewards/accuracies": 0.
|
600 |
-
"rewards/chosen": -1.
|
601 |
-
"rewards/margins":
|
602 |
-
"rewards/rejected": -3.
|
603 |
"step": 380
|
604 |
},
|
605 |
{
|
606 |
"epoch": 0.96,
|
607 |
"learning_rate": 2.092101988131256e-09,
|
608 |
-
"logits/chosen": -2.
|
609 |
-
"logits/rejected": -2.
|
610 |
-
"logps/chosen": -
|
611 |
-
"logps/rejected": -
|
612 |
-
"loss": 0.
|
613 |
-
"rewards/accuracies": 0.
|
614 |
-
"rewards/chosen": -1.
|
615 |
-
"rewards/margins": 2.
|
616 |
-
"rewards/rejected": -3.
|
617 |
"step": 390
|
618 |
},
|
619 |
{
|
620 |
"epoch": 0.99,
|
621 |
"learning_rate": 2.327445937151673e-10,
|
622 |
-
"logits/chosen": -2.
|
623 |
-
"logits/rejected": -2.
|
624 |
-
"logps/chosen": -
|
625 |
-
"logps/rejected": -
|
626 |
-
"loss": 0.
|
627 |
-
"rewards/accuracies": 0.
|
628 |
-
"rewards/chosen": -1.
|
629 |
-
"rewards/margins": 2.
|
630 |
-
"rewards/rejected": -3.
|
631 |
"step": 400
|
632 |
},
|
633 |
{
|
634 |
"epoch": 0.99,
|
635 |
-
"eval_logits/chosen": -2.
|
636 |
-
"eval_logits/rejected": -2.
|
637 |
-
"eval_logps/chosen": -
|
638 |
-
"eval_logps/rejected": -
|
639 |
-
"eval_loss": 0.
|
640 |
"eval_rewards/accuracies": 0.625,
|
641 |
-
"eval_rewards/chosen": -2.
|
642 |
-
"eval_rewards/margins": 0.
|
643 |
-
"eval_rewards/rejected": -2.
|
644 |
-
"eval_runtime": 53.
|
645 |
-
"eval_samples_per_second": 37.
|
646 |
-
"eval_steps_per_second": 0.
|
647 |
"step": 400
|
648 |
},
|
649 |
{
|
650 |
"epoch": 1.0,
|
651 |
"step": 405,
|
652 |
"total_flos": 0.0,
|
653 |
-
"train_loss": 0.
|
654 |
-
"train_runtime":
|
655 |
-
"train_samples_per_second": 13.
|
656 |
"train_steps_per_second": 0.108
|
657 |
}
|
658 |
],
|
|
|
11 |
{
|
12 |
"epoch": 0.0,
|
13 |
"learning_rate": 1.2195121951219512e-08,
|
14 |
+
"logits/chosen": -2.8681135177612305,
|
15 |
+
"logits/rejected": -2.8858838081359863,
|
16 |
+
"logps/chosen": -518.1907958984375,
|
17 |
+
"logps/rejected": -109.31971740722656,
|
18 |
+
"loss": 0.3475,
|
19 |
"rewards/accuracies": 0.0,
|
20 |
"rewards/chosen": 0.0,
|
21 |
"rewards/margins": 0.0,
|
|
|
25 |
{
|
26 |
"epoch": 0.02,
|
27 |
"learning_rate": 1.219512195121951e-07,
|
28 |
+
"logits/chosen": -2.7986178398132324,
|
29 |
+
"logits/rejected": -2.752176284790039,
|
30 |
+
"logps/chosen": -434.208251953125,
|
31 |
+
"logps/rejected": -114.19618225097656,
|
32 |
+
"loss": 0.3394,
|
33 |
+
"rewards/accuracies": 0.4930555522441864,
|
34 |
+
"rewards/chosen": 0.0002649651141837239,
|
35 |
+
"rewards/margins": 0.0009347840095870197,
|
36 |
+
"rewards/rejected": -0.0006698188371956348,
|
37 |
"step": 10
|
38 |
},
|
39 |
{
|
40 |
"epoch": 0.05,
|
41 |
"learning_rate": 2.439024390243902e-07,
|
42 |
+
"logits/chosen": -2.8215415477752686,
|
43 |
+
"logits/rejected": -2.7983882427215576,
|
44 |
+
"logps/chosen": -417.2633361816406,
|
45 |
+
"logps/rejected": -118.0062026977539,
|
46 |
+
"loss": 0.3373,
|
47 |
+
"rewards/accuracies": 0.7250000238418579,
|
48 |
+
"rewards/chosen": 0.019945567473769188,
|
49 |
+
"rewards/margins": 0.03575458750128746,
|
50 |
+
"rewards/rejected": -0.015809018164873123,
|
51 |
"step": 20
|
52 |
},
|
53 |
{
|
54 |
"epoch": 0.07,
|
55 |
"learning_rate": 3.6585365853658536e-07,
|
56 |
+
"logits/chosen": -2.6574862003326416,
|
57 |
+
"logits/rejected": -2.6451315879821777,
|
58 |
+
"logps/chosen": -398.87353515625,
|
59 |
+
"logps/rejected": -125.69970703125,
|
60 |
+
"loss": 0.3045,
|
61 |
+
"rewards/accuracies": 0.7562500238418579,
|
62 |
+
"rewards/chosen": 0.07569055259227753,
|
63 |
+
"rewards/margins": 0.19884702563285828,
|
64 |
+
"rewards/rejected": -0.12315647304058075,
|
65 |
"step": 30
|
66 |
},
|
67 |
{
|
68 |
"epoch": 0.1,
|
69 |
"learning_rate": 4.878048780487804e-07,
|
70 |
+
"logits/chosen": -2.54256272315979,
|
71 |
+
"logits/rejected": -2.5281729698181152,
|
72 |
+
"logps/chosen": -384.5321044921875,
|
73 |
+
"logps/rejected": -168.55758666992188,
|
74 |
+
"loss": 0.2564,
|
75 |
+
"rewards/accuracies": 0.737500011920929,
|
76 |
+
"rewards/chosen": 0.024631643667817116,
|
77 |
+
"rewards/margins": 0.41851943731307983,
|
78 |
+
"rewards/rejected": -0.39388787746429443,
|
79 |
"step": 40
|
80 |
},
|
81 |
{
|
82 |
"epoch": 0.12,
|
83 |
"learning_rate": 4.992461696250783e-07,
|
84 |
+
"logits/chosen": -2.4257261753082275,
|
85 |
+
"logits/rejected": -2.3928446769714355,
|
86 |
+
"logps/chosen": -436.45330810546875,
|
87 |
+
"logps/rejected": -219.0617218017578,
|
88 |
+
"loss": 0.1809,
|
89 |
+
"rewards/accuracies": 0.7749999761581421,
|
90 |
+
"rewards/chosen": -0.1671580970287323,
|
91 |
+
"rewards/margins": 0.7879143953323364,
|
92 |
+
"rewards/rejected": -0.9550724029541016,
|
93 |
"step": 50
|
94 |
},
|
95 |
{
|
96 |
"epoch": 0.15,
|
97 |
"learning_rate": 4.966461721767899e-07,
|
98 |
+
"logits/chosen": -2.3805835247039795,
|
99 |
+
"logits/rejected": -2.3364853858947754,
|
100 |
+
"logps/chosen": -437.4466857910156,
|
101 |
+
"logps/rejected": -240.6685791015625,
|
102 |
+
"loss": 0.1377,
|
103 |
+
"rewards/accuracies": 0.768750011920929,
|
104 |
+
"rewards/chosen": -0.32454290986061096,
|
105 |
+
"rewards/margins": 0.9316140413284302,
|
106 |
+
"rewards/rejected": -1.2561569213867188,
|
107 |
"step": 60
|
108 |
},
|
109 |
{
|
110 |
"epoch": 0.17,
|
111 |
"learning_rate": 4.922100518015975e-07,
|
112 |
+
"logits/chosen": -2.3752458095550537,
|
113 |
+
"logits/rejected": -2.3281030654907227,
|
114 |
+
"logps/chosen": -419.6747131347656,
|
115 |
+
"logps/rejected": -264.75787353515625,
|
116 |
+
"loss": 0.103,
|
117 |
+
"rewards/accuracies": 0.7562500238418579,
|
118 |
+
"rewards/chosen": -0.3937300443649292,
|
119 |
+
"rewards/margins": 1.1917842626571655,
|
120 |
+
"rewards/rejected": -1.5855143070220947,
|
121 |
"step": 70
|
122 |
},
|
123 |
{
|
124 |
"epoch": 0.2,
|
125 |
"learning_rate": 4.859708325770919e-07,
|
126 |
+
"logits/chosen": -2.4320530891418457,
|
127 |
+
"logits/rejected": -2.3738579750061035,
|
128 |
+
"logps/chosen": -472.10479736328125,
|
129 |
+
"logps/rejected": -330.32403564453125,
|
130 |
+
"loss": 0.0674,
|
131 |
+
"rewards/accuracies": 0.8187500238418579,
|
132 |
+
"rewards/chosen": -0.5765678286552429,
|
133 |
+
"rewards/margins": 1.5421369075775146,
|
134 |
+
"rewards/rejected": -2.1187047958374023,
|
135 |
"step": 80
|
136 |
},
|
137 |
{
|
138 |
"epoch": 0.22,
|
139 |
"learning_rate": 4.779749614980225e-07,
|
140 |
+
"logits/chosen": -2.3991949558258057,
|
141 |
+
"logits/rejected": -2.357053279876709,
|
142 |
+
"logps/chosen": -487.83074951171875,
|
143 |
+
"logps/rejected": -349.1925354003906,
|
144 |
+
"loss": 0.0553,
|
145 |
+
"rewards/accuracies": 0.8062499761581421,
|
146 |
+
"rewards/chosen": -0.666537880897522,
|
147 |
+
"rewards/margins": 1.7182201147079468,
|
148 |
+
"rewards/rejected": -2.3847577571868896,
|
149 |
"step": 90
|
150 |
},
|
151 |
{
|
152 |
"epoch": 0.25,
|
153 |
"learning_rate": 4.682819627081427e-07,
|
154 |
+
"logits/chosen": -2.3752927780151367,
|
155 |
+
"logits/rejected": -2.326216220855713,
|
156 |
+
"logps/chosen": -515.1549682617188,
|
157 |
+
"logps/rejected": -378.8877868652344,
|
158 |
+
"loss": 0.0437,
|
159 |
+
"rewards/accuracies": 0.862500011920929,
|
160 |
+
"rewards/chosen": -0.6667075157165527,
|
161 |
+
"rewards/margins": 2.000246524810791,
|
162 |
+
"rewards/rejected": -2.666954278945923,
|
163 |
"step": 100
|
164 |
},
|
165 |
{
|
166 |
"epoch": 0.25,
|
167 |
+
"eval_logits/chosen": -2.3259778022766113,
|
168 |
+
"eval_logits/rejected": -2.314302682876587,
|
169 |
+
"eval_logps/chosen": -482.4153747558594,
|
170 |
+
"eval_logps/rejected": -504.759033203125,
|
171 |
+
"eval_loss": 0.08243285864591599,
|
172 |
+
"eval_rewards/accuracies": 0.5859375,
|
173 |
+
"eval_rewards/chosen": -2.2537574768066406,
|
174 |
+
"eval_rewards/margins": 0.22029951214790344,
|
175 |
+
"eval_rewards/rejected": -2.4740567207336426,
|
176 |
+
"eval_runtime": 53.3582,
|
177 |
+
"eval_samples_per_second": 37.483,
|
178 |
"eval_steps_per_second": 0.6,
|
179 |
"step": 100
|
180 |
},
|
181 |
{
|
182 |
"epoch": 0.27,
|
183 |
"learning_rate": 4.569639943810477e-07,
|
184 |
+
"logits/chosen": -2.3144371509552,
|
185 |
+
"logits/rejected": -2.2340025901794434,
|
186 |
+
"logps/chosen": -490.12921142578125,
|
187 |
+
"logps/rejected": -419.07867431640625,
|
188 |
+
"loss": 0.0359,
|
189 |
+
"rewards/accuracies": 0.737500011920929,
|
190 |
+
"rewards/chosen": -1.3260681629180908,
|
191 |
+
"rewards/margins": 1.7610738277435303,
|
192 |
+
"rewards/rejected": -3.0871422290802,
|
193 |
"step": 110
|
194 |
},
|
195 |
{
|
196 |
"epoch": 0.3,
|
197 |
"learning_rate": 4.4410531154874543e-07,
|
198 |
+
"logits/chosen": -2.3716444969177246,
|
199 |
+
"logits/rejected": -2.3235533237457275,
|
200 |
+
"logps/chosen": -466.01702880859375,
|
201 |
+
"logps/rejected": -356.8735046386719,
|
202 |
+
"loss": 0.0575,
|
203 |
+
"rewards/accuracies": 0.7875000238418579,
|
204 |
+
"rewards/chosen": -0.7180399298667908,
|
205 |
+
"rewards/margins": 1.6505486965179443,
|
206 |
+
"rewards/rejected": -2.368588924407959,
|
207 |
"step": 120
|
208 |
},
|
209 |
{
|
210 |
"epoch": 0.32,
|
211 |
"learning_rate": 4.298016388768561e-07,
|
212 |
+
"logits/chosen": -2.3074584007263184,
|
213 |
+
"logits/rejected": -2.257930040359497,
|
214 |
+
"logps/chosen": -472.1845703125,
|
215 |
+
"logps/rejected": -373.66522216796875,
|
216 |
+
"loss": 0.0498,
|
217 |
+
"rewards/accuracies": 0.768750011920929,
|
218 |
+
"rewards/chosen": -1.0088322162628174,
|
219 |
+
"rewards/margins": 1.6445964574813843,
|
220 |
+
"rewards/rejected": -2.653428792953491,
|
221 |
"step": 130
|
222 |
},
|
223 |
{
|
224 |
"epoch": 0.35,
|
225 |
"learning_rate": 4.1415945805573005e-07,
|
226 |
+
"logits/chosen": -2.225804328918457,
|
227 |
+
"logits/rejected": -2.15400767326355,
|
228 |
+
"logps/chosen": -534.1700439453125,
|
229 |
+
"logps/rejected": -430.3104553222656,
|
230 |
+
"loss": 0.0361,
|
231 |
+
"rewards/accuracies": 0.8500000238418579,
|
232 |
+
"rewards/chosen": -1.042690634727478,
|
233 |
+
"rewards/margins": 2.092653751373291,
|
234 |
+
"rewards/rejected": -3.1353445053100586,
|
235 |
"step": 140
|
236 |
},
|
237 |
{
|
238 |
"epoch": 0.37,
|
239 |
"learning_rate": 3.972952151123984e-07,
|
240 |
+
"logits/chosen": -2.2562787532806396,
|
241 |
+
"logits/rejected": -2.164506673812866,
|
242 |
+
"logps/chosen": -522.7659912109375,
|
243 |
+
"logps/rejected": -425.18109130859375,
|
244 |
+
"loss": 0.0344,
|
245 |
+
"rewards/accuracies": 0.8500000238418579,
|
246 |
+
"rewards/chosen": -1.1226718425750732,
|
247 |
+
"rewards/margins": 2.0036892890930176,
|
248 |
+
"rewards/rejected": -3.12636137008667,
|
249 |
"step": 150
|
250 |
},
|
251 |
{
|
252 |
"epoch": 0.39,
|
253 |
"learning_rate": 3.793344535444142e-07,
|
254 |
+
"logits/chosen": -2.267565965652466,
|
255 |
+
"logits/rejected": -2.1969974040985107,
|
256 |
+
"logps/chosen": -530.3189086914062,
|
257 |
+
"logps/rejected": -426.72332763671875,
|
258 |
+
"loss": 0.0393,
|
259 |
+
"rewards/accuracies": 0.768750011920929,
|
260 |
+
"rewards/chosen": -1.2129985094070435,
|
261 |
+
"rewards/margins": 1.8231168985366821,
|
262 |
+
"rewards/rejected": -3.0361156463623047,
|
263 |
"step": 160
|
264 |
},
|
265 |
{
|
266 |
"epoch": 0.42,
|
267 |
"learning_rate": 3.604108797288461e-07,
|
268 |
+
"logits/chosen": -2.237342119216919,
|
269 |
+
"logits/rejected": -2.1961898803710938,
|
270 |
+
"logps/chosen": -448.13812255859375,
|
271 |
+
"logps/rejected": -372.9068298339844,
|
272 |
+
"loss": 0.0465,
|
273 |
+
"rewards/accuracies": 0.768750011920929,
|
274 |
+
"rewards/chosen": -1.092370629310608,
|
275 |
+
"rewards/margins": 1.5985119342803955,
|
276 |
+
"rewards/rejected": -2.690882444381714,
|
277 |
"step": 170
|
278 |
},
|
279 |
{
|
280 |
"epoch": 0.44,
|
281 |
"learning_rate": 3.40665367563858e-07,
|
282 |
+
"logits/chosen": -2.2571911811828613,
|
283 |
+
"logits/rejected": -2.140353202819824,
|
284 |
+
"logps/chosen": -548.1529541015625,
|
285 |
+
"logps/rejected": -449.4532165527344,
|
286 |
+
"loss": 0.035,
|
287 |
+
"rewards/accuracies": 0.8187500238418579,
|
288 |
+
"rewards/chosen": -0.8807584643363953,
|
289 |
+
"rewards/margins": 2.355053663253784,
|
290 |
+
"rewards/rejected": -3.2358124256134033,
|
291 |
"step": 180
|
292 |
},
|
293 |
{
|
294 |
"epoch": 0.47,
|
295 |
"learning_rate": 3.202449097526798e-07,
|
296 |
+
"logits/chosen": -2.1954236030578613,
|
297 |
+
"logits/rejected": -2.113832950592041,
|
298 |
+
"logps/chosen": -545.7277221679688,
|
299 |
+
"logps/rejected": -466.76580810546875,
|
300 |
+
"loss": 0.029,
|
301 |
+
"rewards/accuracies": 0.768750011920929,
|
302 |
+
"rewards/chosen": -1.4056795835494995,
|
303 |
+
"rewards/margins": 2.1022555828094482,
|
304 |
+
"rewards/rejected": -3.5079357624053955,
|
305 |
"step": 190
|
306 |
},
|
307 |
{
|
308 |
"epoch": 0.49,
|
309 |
"learning_rate": 2.993015235369905e-07,
|
310 |
+
"logits/chosen": -2.1386027336120605,
|
311 |
+
"logits/rejected": -2.0572166442871094,
|
312 |
+
"logps/chosen": -560.2534790039062,
|
313 |
+
"logps/rejected": -491.8816833496094,
|
314 |
+
"loss": 0.0258,
|
315 |
+
"rewards/accuracies": 0.7437499761581421,
|
316 |
+
"rewards/chosen": -1.810624361038208,
|
317 |
+
"rewards/margins": 1.9691530466079712,
|
318 |
+
"rewards/rejected": -3.7797775268554688,
|
319 |
"step": 200
|
320 |
},
|
321 |
{
|
322 |
"epoch": 0.49,
|
323 |
+
"eval_logits/chosen": -2.1394448280334473,
|
324 |
+
"eval_logits/rejected": -2.1155476570129395,
|
325 |
+
"eval_logps/chosen": -543.8071899414062,
|
326 |
+
"eval_logps/rejected": -579.2755126953125,
|
327 |
+
"eval_loss": 0.0581156425178051,
|
328 |
+
"eval_rewards/accuracies": 0.59765625,
|
329 |
+
"eval_rewards/chosen": -2.86767578125,
|
330 |
+
"eval_rewards/margins": 0.35154610872268677,
|
331 |
+
"eval_rewards/rejected": -3.219222068786621,
|
332 |
+
"eval_runtime": 53.2701,
|
333 |
+
"eval_samples_per_second": 37.545,
|
334 |
+
"eval_steps_per_second": 0.601,
|
335 |
"step": 200
|
336 |
},
|
337 |
{
|
338 |
"epoch": 0.52,
|
339 |
"learning_rate": 2.7799111902582693e-07,
|
340 |
+
"logits/chosen": -2.1782305240631104,
|
341 |
+
"logits/rejected": -2.044674873352051,
|
342 |
+
"logps/chosen": -579.908935546875,
|
343 |
+
"logps/rejected": -500.6641540527344,
|
344 |
+
"loss": 0.0219,
|
345 |
+
"rewards/accuracies": 0.8187500238418579,
|
346 |
+
"rewards/chosen": -1.408044695854187,
|
347 |
+
"rewards/margins": 2.4992563724517822,
|
348 |
+
"rewards/rejected": -3.9073009490966797,
|
349 |
"step": 210
|
350 |
},
|
351 |
{
|
352 |
"epoch": 0.54,
|
353 |
"learning_rate": 2.564723385445869e-07,
|
354 |
+
"logits/chosen": -2.2589755058288574,
|
355 |
+
"logits/rejected": -2.156228542327881,
|
356 |
+
"logps/chosen": -563.1976318359375,
|
357 |
+
"logps/rejected": -475.75030517578125,
|
358 |
+
"loss": 0.038,
|
359 |
+
"rewards/accuracies": 0.762499988079071,
|
360 |
+
"rewards/chosen": -1.3078866004943848,
|
361 |
+
"rewards/margins": 2.1681323051452637,
|
362 |
+
"rewards/rejected": -3.4760184288024902,
|
363 |
"step": 220
|
364 |
},
|
365 |
{
|
366 |
"epoch": 0.57,
|
367 |
"learning_rate": 2.3490537564442845e-07,
|
368 |
+
"logits/chosen": -2.2288191318511963,
|
369 |
+
"logits/rejected": -2.136579751968384,
|
370 |
+
"logps/chosen": -507.54632568359375,
|
371 |
+
"logps/rejected": -419.88470458984375,
|
372 |
+
"loss": 0.0432,
|
373 |
+
"rewards/accuracies": 0.737500011920929,
|
374 |
+
"rewards/chosen": -1.2000774145126343,
|
375 |
+
"rewards/margins": 1.7510545253753662,
|
376 |
+
"rewards/rejected": -2.951131820678711,
|
377 |
"step": 230
|
378 |
},
|
379 |
{
|
380 |
"epoch": 0.59,
|
381 |
"learning_rate": 2.1345078256378801e-07,
|
382 |
+
"logits/chosen": -2.282217264175415,
|
383 |
+
"logits/rejected": -2.1927459239959717,
|
384 |
+
"logps/chosen": -539.92822265625,
|
385 |
+
"logps/rejected": -433.8241271972656,
|
386 |
+
"loss": 0.0373,
|
387 |
+
"rewards/accuracies": 0.7749999761581421,
|
388 |
+
"rewards/chosen": -1.2784963846206665,
|
389 |
+
"rewards/margins": 1.8950881958007812,
|
390 |
+
"rewards/rejected": -3.1735846996307373,
|
391 |
"step": 240
|
392 |
},
|
393 |
{
|
394 |
"epoch": 0.62,
|
395 |
"learning_rate": 1.9226827501969865e-07,
|
396 |
+
"logits/chosen": -2.2803092002868652,
|
397 |
+
"logits/rejected": -2.1990160942077637,
|
398 |
+
"logps/chosen": -537.9136962890625,
|
399 |
+
"logps/rejected": -442.28350830078125,
|
400 |
+
"loss": 0.04,
|
401 |
+
"rewards/accuracies": 0.800000011920929,
|
402 |
+
"rewards/chosen": -1.2320274114608765,
|
403 |
+
"rewards/margins": 2.1027939319610596,
|
404 |
+
"rewards/rejected": -3.3348212242126465,
|
405 |
"step": 250
|
406 |
},
|
407 |
{
|
408 |
"epoch": 0.64,
|
409 |
"learning_rate": 1.715155432264775e-07,
|
410 |
+
"logits/chosen": -2.2646355628967285,
|
411 |
+
"logits/rejected": -2.14613676071167,
|
412 |
+
"logps/chosen": -502.49664306640625,
|
413 |
+
"logps/rejected": -420.11004638671875,
|
414 |
+
"loss": 0.0396,
|
415 |
+
"rewards/accuracies": 0.800000011920929,
|
416 |
+
"rewards/chosen": -1.1264328956604004,
|
417 |
+
"rewards/margins": 2.006878614425659,
|
418 |
+
"rewards/rejected": -3.1333117485046387,
|
419 |
"step": 260
|
420 |
},
|
421 |
{
|
422 |
"epoch": 0.67,
|
423 |
"learning_rate": 1.51347077992983e-07,
|
424 |
+
"logits/chosen": -2.3088698387145996,
|
425 |
+
"logits/rejected": -2.2018628120422363,
|
426 |
+
"logps/chosen": -554.0256958007812,
|
427 |
+
"logps/rejected": -421.2101135253906,
|
428 |
+
"loss": 0.0375,
|
429 |
+
"rewards/accuracies": 0.831250011920929,
|
430 |
+
"rewards/chosen": -1.10258948802948,
|
431 |
+
"rewards/margins": 1.9626919031143188,
|
432 |
+
"rewards/rejected": -3.065281391143799,
|
433 |
"step": 270
|
434 |
},
|
435 |
{
|
436 |
"epoch": 0.69,
|
437 |
"learning_rate": 1.3191302063739906e-07,
|
438 |
+
"logits/chosen": -2.310133457183838,
|
439 |
+
"logits/rejected": -2.216827392578125,
|
440 |
+
"logps/chosen": -522.3606567382812,
|
441 |
+
"logps/rejected": -438.058349609375,
|
442 |
+
"loss": 0.043,
|
443 |
+
"rewards/accuracies": 0.800000011920929,
|
444 |
+
"rewards/chosen": -1.1809624433517456,
|
445 |
+
"rewards/margins": 1.9291051626205444,
|
446 |
+
"rewards/rejected": -3.110067844390869,
|
447 |
"step": 280
|
448 |
},
|
449 |
{
|
450 |
"epoch": 0.72,
|
451 |
"learning_rate": 1.1335804528119475e-07,
|
452 |
+
"logits/chosen": -2.3108785152435303,
|
453 |
+
"logits/rejected": -2.2141172885894775,
|
454 |
+
"logps/chosen": -544.7510986328125,
|
455 |
+
"logps/rejected": -427.60150146484375,
|
456 |
+
"loss": 0.044,
|
457 |
+
"rewards/accuracies": 0.831250011920929,
|
458 |
+
"rewards/chosen": -1.00501549243927,
|
459 |
+
"rewards/margins": 2.146829605102539,
|
460 |
+
"rewards/rejected": -3.1518452167510986,
|
461 |
"step": 290
|
462 |
},
|
463 |
{
|
464 |
"epoch": 0.74,
|
465 |
"learning_rate": 9.582028184286423e-08,
|
466 |
+
"logits/chosen": -2.350487470626831,
|
467 |
+
"logits/rejected": -2.307096481323242,
|
468 |
+
"logps/chosen": -554.42529296875,
|
469 |
+
"logps/rejected": -470.14434814453125,
|
470 |
+
"loss": 0.0402,
|
471 |
+
"rewards/accuracies": 0.8062499761581421,
|
472 |
+
"rewards/chosen": -1.1662867069244385,
|
473 |
+
"rewards/margins": 2.156501531600952,
|
474 |
+
"rewards/rejected": -3.3227882385253906,
|
475 |
"step": 300
|
476 |
},
|
477 |
{
|
478 |
"epoch": 0.74,
|
479 |
+
"eval_logits/chosen": -2.2979543209075928,
|
480 |
+
"eval_logits/rejected": -2.2751243114471436,
|
481 |
+
"eval_logps/chosen": -467.0057067871094,
|
482 |
+
"eval_logps/rejected": -507.4114685058594,
|
483 |
+
"eval_loss": 0.08367828279733658,
|
484 |
+
"eval_rewards/accuracies": 0.62890625,
|
485 |
+
"eval_rewards/chosen": -2.099660634994507,
|
486 |
+
"eval_rewards/margins": 0.4009218215942383,
|
487 |
+
"eval_rewards/rejected": -2.500582218170166,
|
488 |
+
"eval_runtime": 53.3734,
|
489 |
+
"eval_samples_per_second": 37.472,
|
490 |
"eval_steps_per_second": 0.6,
|
491 |
"step": 300
|
492 |
},
|
493 |
{
|
494 |
"epoch": 0.76,
|
495 |
"learning_rate": 7.943028774907065e-08,
|
496 |
+
"logits/chosen": -2.316253185272217,
|
497 |
+
"logits/rejected": -2.209606170654297,
|
498 |
+
"logps/chosen": -524.6145629882812,
|
499 |
+
"logps/rejected": -420.94671630859375,
|
500 |
+
"loss": 0.0324,
|
501 |
+
"rewards/accuracies": 0.768750011920929,
|
502 |
+
"rewards/chosen": -1.0568145513534546,
|
503 |
+
"rewards/margins": 2.0644707679748535,
|
504 |
+
"rewards/rejected": -3.1212852001190186,
|
505 |
"step": 310
|
506 |
},
|
507 |
{
|
508 |
"epoch": 0.79,
|
509 |
"learning_rate": 6.431007601814637e-08,
|
510 |
+
"logits/chosen": -2.2733869552612305,
|
511 |
+
"logits/rejected": -2.169506549835205,
|
512 |
+
"logps/chosen": -532.5906982421875,
|
513 |
+
"logps/rejected": -450.932373046875,
|
514 |
+
"loss": 0.0316,
|
515 |
+
"rewards/accuracies": 0.762499988079071,
|
516 |
+
"rewards/chosen": -1.5221502780914307,
|
517 |
+
"rewards/margins": 1.9199508428573608,
|
518 |
+
"rewards/rejected": -3.442101001739502,
|
519 |
"step": 320
|
520 |
},
|
521 |
{
|
522 |
"epoch": 0.81,
|
523 |
"learning_rate": 5.0572206951246e-08,
|
524 |
+
"logits/chosen": -2.290539503097534,
|
525 |
+
"logits/rejected": -2.193920850753784,
|
526 |
+
"logps/chosen": -562.043701171875,
|
527 |
+
"logps/rejected": -479.5208435058594,
|
528 |
+
"loss": 0.0278,
|
529 |
+
"rewards/accuracies": 0.793749988079071,
|
530 |
+
"rewards/chosen": -1.3767458200454712,
|
531 |
+
"rewards/margins": 2.1372973918914795,
|
532 |
+
"rewards/rejected": -3.514043092727661,
|
533 |
"step": 330
|
534 |
},
|
535 |
{
|
536 |
"epoch": 0.84,
|
537 |
"learning_rate": 3.831895019292897e-08,
|
538 |
+
"logits/chosen": -2.3263535499572754,
|
539 |
+
"logits/rejected": -2.207899570465088,
|
540 |
+
"logps/chosen": -619.2625122070312,
|
541 |
+
"logps/rejected": -520.6148071289062,
|
542 |
+
"loss": 0.0305,
|
543 |
+
"rewards/accuracies": 0.7562500238418579,
|
544 |
+
"rewards/chosen": -1.5666420459747314,
|
545 |
+
"rewards/margins": 2.300938844680786,
|
546 |
+
"rewards/rejected": -3.8675804138183594,
|
547 |
"step": 340
|
548 |
},
|
549 |
{
|
550 |
"epoch": 0.86,
|
551 |
"learning_rate": 2.764152339909756e-08,
|
552 |
+
"logits/chosen": -2.305875539779663,
|
553 |
+
"logits/rejected": -2.1924188137054443,
|
554 |
+
"logps/chosen": -568.1319580078125,
|
555 |
+
"logps/rejected": -475.6539611816406,
|
556 |
+
"loss": 0.0245,
|
557 |
+
"rewards/accuracies": 0.8062499761581421,
|
558 |
+
"rewards/chosen": -1.341552972793579,
|
559 |
+
"rewards/margins": 2.3198726177215576,
|
560 |
+
"rewards/rejected": -3.661425828933716,
|
561 |
"step": 350
|
562 |
},
|
563 |
{
|
564 |
"epoch": 0.89,
|
565 |
"learning_rate": 1.861941317991664e-08,
|
566 |
+
"logits/chosen": -2.31453800201416,
|
567 |
+
"logits/rejected": -2.209552049636841,
|
568 |
+
"logps/chosen": -574.0198974609375,
|
569 |
+
"logps/rejected": -498.809326171875,
|
570 |
+
"loss": 0.0246,
|
571 |
+
"rewards/accuracies": 0.8187500238418579,
|
572 |
+
"rewards/chosen": -1.3858083486557007,
|
573 |
+
"rewards/margins": 2.422987699508667,
|
574 |
+
"rewards/rejected": -3.80879545211792,
|
575 |
"step": 360
|
576 |
},
|
577 |
{
|
578 |
"epoch": 0.91,
|
579 |
"learning_rate": 1.13197833728636e-08,
|
580 |
+
"logits/chosen": -2.2876980304718018,
|
581 |
+
"logits/rejected": -2.1881823539733887,
|
582 |
+
"logps/chosen": -583.4609985351562,
|
583 |
+
"logps/rejected": -515.4216918945312,
|
584 |
+
"loss": 0.0274,
|
585 |
+
"rewards/accuracies": 0.8125,
|
586 |
+
"rewards/chosen": -1.2713569402694702,
|
587 |
+
"rewards/margins": 2.548645496368408,
|
588 |
+
"rewards/rejected": -3.820002317428589,
|
589 |
"step": 370
|
590 |
},
|
591 |
{
|
592 |
"epoch": 0.94,
|
593 |
"learning_rate": 5.79697505093521e-09,
|
594 |
+
"logits/chosen": -2.2938995361328125,
|
595 |
+
"logits/rejected": -2.161371946334839,
|
596 |
+
"logps/chosen": -567.2229614257812,
|
597 |
+
"logps/rejected": -493.6429138183594,
|
598 |
+
"loss": 0.0339,
|
599 |
+
"rewards/accuracies": 0.8187500238418579,
|
600 |
+
"rewards/chosen": -1.495012879371643,
|
601 |
+
"rewards/margins": 2.173337697982788,
|
602 |
+
"rewards/rejected": -3.6683506965637207,
|
603 |
"step": 380
|
604 |
},
|
605 |
{
|
606 |
"epoch": 0.96,
|
607 |
"learning_rate": 2.092101988131256e-09,
|
608 |
+
"logits/chosen": -2.3137059211730957,
|
609 |
+
"logits/rejected": -2.1986048221588135,
|
610 |
+
"logps/chosen": -600.1227416992188,
|
611 |
+
"logps/rejected": -496.6559143066406,
|
612 |
+
"loss": 0.0258,
|
613 |
+
"rewards/accuracies": 0.7875000238418579,
|
614 |
+
"rewards/chosen": -1.368238925933838,
|
615 |
+
"rewards/margins": 2.388805389404297,
|
616 |
+
"rewards/rejected": -3.7570443153381348,
|
617 |
"step": 390
|
618 |
},
|
619 |
{
|
620 |
"epoch": 0.99,
|
621 |
"learning_rate": 2.327445937151673e-10,
|
622 |
+
"logits/chosen": -2.316849708557129,
|
623 |
+
"logits/rejected": -2.1959729194641113,
|
624 |
+
"logps/chosen": -559.0263061523438,
|
625 |
+
"logps/rejected": -482.11773681640625,
|
626 |
+
"loss": 0.0288,
|
627 |
+
"rewards/accuracies": 0.78125,
|
628 |
+
"rewards/chosen": -1.659148931503296,
|
629 |
+
"rewards/margins": 2.0086750984191895,
|
630 |
+
"rewards/rejected": -3.6678237915039062,
|
631 |
"step": 400
|
632 |
},
|
633 |
{
|
634 |
"epoch": 0.99,
|
635 |
+
"eval_logits/chosen": -2.2708253860473633,
|
636 |
+
"eval_logits/rejected": -2.245922565460205,
|
637 |
+
"eval_logps/chosen": -513.0960083007812,
|
638 |
+
"eval_logps/rejected": -552.8470458984375,
|
639 |
+
"eval_loss": 0.06599809229373932,
|
640 |
"eval_rewards/accuracies": 0.625,
|
641 |
+
"eval_rewards/chosen": -2.560563564300537,
|
642 |
+
"eval_rewards/margins": 0.3943747282028198,
|
643 |
+
"eval_rewards/rejected": -2.9549384117126465,
|
644 |
+
"eval_runtime": 53.3482,
|
645 |
+
"eval_samples_per_second": 37.49,
|
646 |
+
"eval_steps_per_second": 0.6,
|
647 |
"step": 400
|
648 |
},
|
649 |
{
|
650 |
"epoch": 1.0,
|
651 |
"step": 405,
|
652 |
"total_flos": 0.0,
|
653 |
+
"train_loss": 0.07151281171374851,
|
654 |
+
"train_runtime": 3738.25,
|
655 |
+
"train_samples_per_second": 13.882,
|
656 |
"train_steps_per_second": 0.108
|
657 |
}
|
658 |
],
|