Model save
Browse files- README.md +3 -3
- all_results.json +5 -6
- generation_config.json +1 -1
- model-00001-of-00003.safetensors +1 -1
- model-00002-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +1 -1
- runs/Jun30_16-43-35_n136-129-074/events.out.tfevents.1719737035.n136-129-074.1271954.0 +2 -2
- train_results.json +5 -6
- trainer_state.json +496 -508
- training_args.bin +1 -1
README.md
CHANGED
@@ -32,7 +32,7 @@ More information needed
|
|
32 |
### Training hyperparameters
|
33 |
|
34 |
The following hyperparameters were used during training:
|
35 |
-
- learning_rate:
|
36 |
- train_batch_size: 4
|
37 |
- eval_batch_size: 4
|
38 |
- seed: 42
|
@@ -52,7 +52,7 @@ The following hyperparameters were used during training:
|
|
52 |
|
53 |
### Framework versions
|
54 |
|
55 |
-
- Transformers 4.
|
56 |
- Pytorch 2.1.2+cu118
|
57 |
- Datasets 2.16.1
|
58 |
-
- Tokenizers 0.
|
|
|
32 |
### Training hyperparameters
|
33 |
|
34 |
The following hyperparameters were used during training:
|
35 |
+
- learning_rate: 1e-07
|
36 |
- train_batch_size: 4
|
37 |
- eval_batch_size: 4
|
38 |
- seed: 42
|
|
|
52 |
|
53 |
### Framework versions
|
54 |
|
55 |
+
- Transformers 4.39.3
|
56 |
- Pytorch 2.1.2+cu118
|
57 |
- Datasets 2.16.1
|
58 |
+
- Tokenizers 0.15.2
|
all_results.json
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
{
|
2 |
-
"epoch": 0
|
3 |
-
"
|
4 |
-
"
|
5 |
-
"train_runtime": 7020.7263,
|
6 |
"train_samples": 52922,
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second": 0.
|
9 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"train_loss": 0.5277958785073232,
|
4 |
+
"train_runtime": 6372.8883,
|
|
|
5 |
"train_samples": 52922,
|
6 |
+
"train_samples_per_second": 8.304,
|
7 |
+
"train_steps_per_second": 0.065
|
8 |
}
|
generation_config.json
CHANGED
@@ -2,5 +2,5 @@
|
|
2 |
"_from_model_config": true,
|
3 |
"bos_token_id": 1,
|
4 |
"eos_token_id": 2,
|
5 |
-
"transformers_version": "4.
|
6 |
}
|
|
|
2 |
"_from_model_config": true,
|
3 |
"bos_token_id": 1,
|
4 |
"eos_token_id": 2,
|
5 |
+
"transformers_version": "4.39.3"
|
6 |
}
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4943162336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba15de61ecf1fab38692965bea81fb4fbc15bdea9730cfaafee7a941062fd99a
|
3 |
size 4943162336
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999819336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a86b9d593b597cc2ecc5f43476a07ebe0e278ba57f94d4f7643b216560e89d16
|
3 |
size 4999819336
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4540516344
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9c8ca0b8509b351f3013c55b40a6f892b4fec7ad730321d44ee3a7d0c8d4bf4
|
3 |
size 4540516344
|
runs/Jun30_16-43-35_n136-129-074/events.out.tfevents.1719737035.n136-129-074.1271954.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23af12085174b96f7419c292bede08c913f4deed18456f1554b5e4498ef1e928
|
3 |
+
size 33985
|
train_results.json
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
{
|
2 |
-
"epoch": 0
|
3 |
-
"
|
4 |
-
"
|
5 |
-
"train_runtime": 7020.7263,
|
6 |
"train_samples": 52922,
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second": 0.
|
9 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"train_loss": 0.5277958785073232,
|
4 |
+
"train_runtime": 6372.8883,
|
|
|
5 |
"train_samples": 52922,
|
6 |
+
"train_samples_per_second": 8.304,
|
7 |
+
"train_steps_per_second": 0.065
|
8 |
}
|
trainer_state.json
CHANGED
@@ -9,9 +9,9 @@
|
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
-
"epoch": 0.
|
13 |
-
"grad_norm": 1308.
|
14 |
-
"learning_rate":
|
15 |
"logits/chosen": -2.7005977630615234,
|
16 |
"logits/rejected": -2.6288318634033203,
|
17 |
"logps/chosen": -1.1158788204193115,
|
@@ -24,628 +24,628 @@
|
|
24 |
"step": 1
|
25 |
},
|
26 |
{
|
27 |
-
"epoch": 0.
|
28 |
-
"grad_norm":
|
29 |
-
"learning_rate":
|
30 |
-
"logits/chosen": -2.
|
31 |
-
"logits/rejected": -2.
|
32 |
-
"logps/chosen": -0.
|
33 |
-
"logps/rejected": -0.
|
34 |
-
"loss": 0.
|
35 |
-
"rewards/accuracies": 0.
|
36 |
-
"rewards/chosen": 0.
|
37 |
-
"rewards/margins": 0.
|
38 |
-
"rewards/rejected": -0.
|
39 |
"step": 10
|
40 |
},
|
41 |
{
|
42 |
-
"epoch": 0.
|
43 |
-
"grad_norm":
|
44 |
-
"learning_rate":
|
45 |
-
"logits/chosen": -2.
|
46 |
-
"logits/rejected": -2.
|
47 |
-
"logps/chosen": -0.
|
48 |
-
"logps/rejected": -1.
|
49 |
-
"loss": 0.
|
50 |
-
"rewards/accuracies": 0.
|
51 |
-
"rewards/chosen": -0.
|
52 |
-
"rewards/margins": 0.
|
53 |
-
"rewards/rejected": -0.
|
54 |
"step": 20
|
55 |
},
|
56 |
{
|
57 |
-
"epoch": 0.
|
58 |
-
"grad_norm":
|
59 |
-
"learning_rate":
|
60 |
-
"logits/chosen": -2.
|
61 |
-
"logits/rejected": -2.
|
62 |
-
"logps/chosen": -0.
|
63 |
-
"logps/rejected": -0.
|
64 |
-
"loss": 0.
|
65 |
-
"rewards/accuracies": 0.
|
66 |
-
"rewards/chosen": -0.
|
67 |
-
"rewards/margins": 0.
|
68 |
-
"rewards/rejected": -
|
69 |
"step": 30
|
70 |
},
|
71 |
{
|
72 |
-
"epoch": 0.
|
73 |
-
"grad_norm":
|
74 |
-
"learning_rate":
|
75 |
-
"logits/chosen": -2.
|
76 |
-
"logits/rejected": -2.
|
77 |
-
"logps/chosen": -0.
|
78 |
-
"logps/rejected": -0.
|
79 |
-
"loss": 0.
|
80 |
-
"rewards/accuracies": 0.
|
81 |
-
"rewards/chosen": -
|
82 |
-
"rewards/margins":
|
83 |
-
"rewards/rejected": -
|
84 |
"step": 40
|
85 |
},
|
86 |
{
|
87 |
-
"epoch": 0.
|
88 |
-
"grad_norm":
|
89 |
-
"learning_rate":
|
90 |
-
"logits/chosen": -2.
|
91 |
-
"logits/rejected": -2.
|
92 |
-
"logps/chosen": -0.
|
93 |
-
"logps/rejected": -0.
|
94 |
-
"loss": 0.
|
95 |
-
"rewards/accuracies": 0.
|
96 |
-
"rewards/chosen": 0.
|
97 |
-
"rewards/margins":
|
98 |
-
"rewards/rejected": -0.
|
99 |
"step": 50
|
100 |
},
|
101 |
{
|
102 |
-
"epoch": 0.
|
103 |
-
"grad_norm":
|
104 |
-
"learning_rate":
|
105 |
-
"logits/chosen": -2.
|
106 |
-
"logits/rejected": -2.
|
107 |
-
"logps/chosen": -0.
|
108 |
-
"logps/rejected": -0.
|
109 |
-
"loss":
|
110 |
-
"rewards/accuracies": 0.
|
111 |
-
"rewards/chosen": -
|
112 |
-
"rewards/margins":
|
113 |
-
"rewards/rejected": -
|
114 |
"step": 60
|
115 |
},
|
116 |
{
|
117 |
-
"epoch": 0.
|
118 |
-
"grad_norm":
|
119 |
-
"learning_rate":
|
120 |
-
"logits/chosen": -2.
|
121 |
-
"logits/rejected": -2.
|
122 |
-
"logps/chosen": -0.
|
123 |
-
"logps/rejected": -0.
|
124 |
-
"loss": 0.
|
125 |
-
"rewards/accuracies": 0.
|
126 |
-
"rewards/chosen":
|
127 |
-
"rewards/margins":
|
128 |
-
"rewards/rejected": -1.
|
129 |
"step": 70
|
130 |
},
|
131 |
{
|
132 |
-
"epoch": 0.
|
133 |
-
"grad_norm":
|
134 |
-
"learning_rate":
|
135 |
-
"logits/chosen": -2.
|
136 |
-
"logits/rejected": -2.
|
137 |
-
"logps/chosen": -0.
|
138 |
-
"logps/rejected": -
|
139 |
-
"loss":
|
140 |
-
"rewards/accuracies": 0.
|
141 |
-
"rewards/chosen": -0.
|
142 |
-
"rewards/margins":
|
143 |
-
"rewards/rejected": -
|
144 |
"step": 80
|
145 |
},
|
146 |
{
|
147 |
-
"epoch": 0.
|
148 |
-
"grad_norm":
|
149 |
-
"learning_rate":
|
150 |
-
"logits/chosen": -2.
|
151 |
-
"logits/rejected": -2.
|
152 |
-
"logps/chosen": -1.
|
153 |
-
"logps/rejected": -1.
|
154 |
-
"loss":
|
155 |
-
"rewards/accuracies": 0.
|
156 |
-
"rewards/chosen": -0.
|
157 |
-
"rewards/margins":
|
158 |
-
"rewards/rejected": -
|
159 |
"step": 90
|
160 |
},
|
161 |
{
|
162 |
-
"epoch": 0.
|
163 |
-
"grad_norm":
|
164 |
-
"learning_rate":
|
165 |
-
"logits/chosen": -2.
|
166 |
-
"logits/rejected": -2.
|
167 |
-
"logps/chosen": -0.
|
168 |
-
"logps/rejected": -0.
|
169 |
-
"loss":
|
170 |
"rewards/accuracies": 0.8062499761581421,
|
171 |
-
"rewards/chosen": -0.
|
172 |
-
"rewards/margins":
|
173 |
-
"rewards/rejected": -
|
174 |
"step": 100
|
175 |
},
|
176 |
{
|
177 |
-
"epoch": 0.
|
178 |
-
"grad_norm":
|
179 |
-
"learning_rate":
|
180 |
-
"logits/chosen": -2.
|
181 |
-
"logits/rejected": -2.
|
182 |
-
"logps/chosen": -0.
|
183 |
-
"logps/rejected": -0.
|
184 |
-
"loss": 0.
|
185 |
-
"rewards/accuracies": 0.
|
186 |
-
"rewards/chosen": -0.
|
187 |
-
"rewards/margins":
|
188 |
-
"rewards/rejected": -
|
189 |
"step": 110
|
190 |
},
|
191 |
{
|
192 |
-
"epoch": 0.
|
193 |
-
"grad_norm":
|
194 |
-
"learning_rate":
|
195 |
-
"logits/chosen": -2.
|
196 |
-
"logits/rejected": -2.
|
197 |
-
"logps/chosen": -0.
|
198 |
-
"logps/rejected": -
|
199 |
-
"loss": 0.
|
200 |
-
"rewards/accuracies": 0.
|
201 |
-
"rewards/chosen": -
|
202 |
-
"rewards/margins":
|
203 |
-
"rewards/rejected": -
|
204 |
"step": 120
|
205 |
},
|
206 |
{
|
207 |
-
"epoch": 0.
|
208 |
-
"grad_norm":
|
209 |
-
"learning_rate":
|
210 |
-
"logits/chosen": -2.
|
211 |
-
"logits/rejected": -2.
|
212 |
-
"logps/chosen": -0.
|
213 |
-
"logps/rejected": -0.
|
214 |
-
"loss":
|
215 |
-
"rewards/accuracies": 0.
|
216 |
-
"rewards/chosen": 0.
|
217 |
-
"rewards/margins":
|
218 |
-
"rewards/rejected": -
|
219 |
"step": 130
|
220 |
},
|
221 |
{
|
222 |
-
"epoch": 0.
|
223 |
-
"grad_norm":
|
224 |
-
"learning_rate":
|
225 |
-
"logits/chosen": -2.
|
226 |
-
"logits/rejected": -2.
|
227 |
-
"logps/chosen": -0.
|
228 |
-
"logps/rejected": -0.
|
229 |
-
"loss": 0.
|
230 |
-
"rewards/accuracies": 0.
|
231 |
-
"rewards/chosen": -
|
232 |
-
"rewards/margins":
|
233 |
-
"rewards/rejected": -
|
234 |
"step": 140
|
235 |
},
|
236 |
{
|
237 |
-
"epoch": 0.
|
238 |
-
"grad_norm":
|
239 |
-
"learning_rate":
|
240 |
-
"logits/chosen": -2.
|
241 |
-
"logits/rejected": -2.
|
242 |
-
"logps/chosen": -0.
|
243 |
-
"logps/rejected": -
|
244 |
-
"loss":
|
245 |
-
"rewards/accuracies": 0.
|
246 |
-
"rewards/chosen": 0.
|
247 |
-
"rewards/margins":
|
248 |
-
"rewards/rejected": -
|
249 |
"step": 150
|
250 |
},
|
251 |
{
|
252 |
-
"epoch": 0.
|
253 |
-
"grad_norm":
|
254 |
-
"learning_rate":
|
255 |
-
"logits/chosen": -2.
|
256 |
-
"logits/rejected": -2.
|
257 |
-
"logps/chosen": -0.
|
258 |
-
"logps/rejected": -0.
|
259 |
-
"loss":
|
260 |
"rewards/accuracies": 0.824999988079071,
|
261 |
-
"rewards/chosen": -0.
|
262 |
-
"rewards/margins":
|
263 |
-
"rewards/rejected": -
|
264 |
"step": 160
|
265 |
},
|
266 |
{
|
267 |
-
"epoch": 0.
|
268 |
-
"grad_norm":
|
269 |
-
"learning_rate":
|
270 |
-
"logits/chosen": -2.
|
271 |
-
"logits/rejected": -2.
|
272 |
-
"logps/chosen": -0.
|
273 |
-
"logps/rejected": -0.
|
274 |
-
"loss":
|
275 |
-
"rewards/accuracies": 0.
|
276 |
-
"rewards/chosen": -
|
277 |
-
"rewards/margins":
|
278 |
-
"rewards/rejected": -
|
279 |
"step": 170
|
280 |
},
|
281 |
{
|
282 |
-
"epoch": 0.
|
283 |
-
"grad_norm":
|
284 |
-
"learning_rate":
|
285 |
-
"logits/chosen": -2.
|
286 |
-
"logits/rejected": -2.
|
287 |
-
"logps/chosen": -0.
|
288 |
-
"logps/rejected": -0.
|
289 |
-
"loss":
|
290 |
-
"rewards/accuracies": 0.
|
291 |
-
"rewards/chosen": 0.
|
292 |
-
"rewards/margins":
|
293 |
-
"rewards/rejected": -
|
294 |
"step": 180
|
295 |
},
|
296 |
{
|
297 |
-
"epoch": 0.
|
298 |
-
"grad_norm":
|
299 |
-
"learning_rate":
|
300 |
-
"logits/chosen": -2.
|
301 |
-
"logits/rejected": -2.
|
302 |
-
"logps/chosen": -0.
|
303 |
-
"logps/rejected": -1.
|
304 |
-
"loss":
|
305 |
-
"rewards/accuracies": 0.
|
306 |
-
"rewards/chosen": -
|
307 |
-
"rewards/margins":
|
308 |
-
"rewards/rejected": -
|
309 |
"step": 190
|
310 |
},
|
311 |
{
|
312 |
-
"epoch": 0.
|
313 |
-
"grad_norm":
|
314 |
-
"learning_rate":
|
315 |
-
"logits/chosen": -2.
|
316 |
-
"logits/rejected": -2.
|
317 |
-
"logps/chosen": -0.
|
318 |
-
"logps/rejected": -0.
|
319 |
-
"loss":
|
320 |
-
"rewards/accuracies": 0.
|
321 |
-
"rewards/chosen": 0.
|
322 |
-
"rewards/margins":
|
323 |
-
"rewards/rejected": -
|
324 |
"step": 200
|
325 |
},
|
326 |
{
|
327 |
-
"epoch": 0.
|
328 |
-
"grad_norm":
|
329 |
-
"learning_rate":
|
330 |
-
"logits/chosen": -2.
|
331 |
-
"logits/rejected": -2.
|
332 |
-
"logps/chosen": -1.
|
333 |
-
"logps/rejected": -0.
|
334 |
-
"loss":
|
335 |
-
"rewards/accuracies": 0.
|
336 |
-
"rewards/chosen": 0.
|
337 |
-
"rewards/margins":
|
338 |
-
"rewards/rejected": -
|
339 |
"step": 210
|
340 |
},
|
341 |
{
|
342 |
-
"epoch": 0.
|
343 |
-
"grad_norm":
|
344 |
-
"learning_rate":
|
345 |
-
"logits/chosen": -2.
|
346 |
-
"logits/rejected": -2.
|
347 |
-
"logps/chosen": -0.
|
348 |
-
"logps/rejected": -0.
|
349 |
-
"loss":
|
350 |
-
"rewards/accuracies": 0.
|
351 |
-
"rewards/chosen": 0.
|
352 |
-
"rewards/margins":
|
353 |
-
"rewards/rejected": -
|
354 |
"step": 220
|
355 |
},
|
356 |
{
|
357 |
-
"epoch": 0.
|
358 |
-
"grad_norm":
|
359 |
-
"learning_rate":
|
360 |
-
"logits/chosen": -2.
|
361 |
-
"logits/rejected": -2.
|
362 |
-
"logps/chosen": -0.
|
363 |
-
"logps/rejected": -0.
|
364 |
-
"loss": 0.
|
365 |
"rewards/accuracies": 0.75,
|
366 |
-
"rewards/chosen": -
|
367 |
-
"rewards/margins":
|
368 |
-
"rewards/rejected": -
|
369 |
"step": 230
|
370 |
},
|
371 |
{
|
372 |
-
"epoch": 0.
|
373 |
-
"grad_norm":
|
374 |
-
"learning_rate":
|
375 |
-
"logits/chosen": -2.
|
376 |
-
"logits/rejected": -2.
|
377 |
-
"logps/chosen": -0.
|
378 |
-
"logps/rejected": -
|
379 |
-
"loss":
|
380 |
-
"rewards/accuracies": 0.
|
381 |
-
"rewards/chosen":
|
382 |
-
"rewards/margins":
|
383 |
-
"rewards/rejected": -
|
384 |
"step": 240
|
385 |
},
|
386 |
{
|
387 |
-
"epoch": 0.
|
388 |
-
"grad_norm":
|
389 |
-
"learning_rate":
|
390 |
-
"logits/chosen": -2.
|
391 |
-
"logits/rejected": -2.
|
392 |
-
"logps/chosen": -0.
|
393 |
-
"logps/rejected": -0.
|
394 |
-
"loss":
|
395 |
-
"rewards/accuracies": 0.
|
396 |
-
"rewards/chosen":
|
397 |
-
"rewards/margins":
|
398 |
-
"rewards/rejected": -
|
399 |
"step": 250
|
400 |
},
|
401 |
{
|
402 |
-
"epoch": 0.
|
403 |
-
"grad_norm":
|
404 |
-
"learning_rate":
|
405 |
-
"logits/chosen": -2.
|
406 |
-
"logits/rejected": -2.
|
407 |
-
"logps/chosen": -1.
|
408 |
-
"logps/rejected": -
|
409 |
-
"loss":
|
410 |
-
"rewards/accuracies": 0.
|
411 |
-
"rewards/chosen": 0.
|
412 |
-
"rewards/margins":
|
413 |
-
"rewards/rejected": -
|
414 |
"step": 260
|
415 |
},
|
416 |
{
|
417 |
-
"epoch": 0.
|
418 |
-
"grad_norm":
|
419 |
-
"learning_rate":
|
420 |
-
"logits/chosen": -2.
|
421 |
-
"logits/rejected": -2.
|
422 |
-
"logps/chosen": -0.
|
423 |
-
"logps/rejected": -0.
|
424 |
-
"loss":
|
425 |
"rewards/accuracies": 0.862500011920929,
|
426 |
-
"rewards/chosen": -
|
427 |
-
"rewards/margins":
|
428 |
-
"rewards/rejected": -
|
429 |
"step": 270
|
430 |
},
|
431 |
{
|
432 |
-
"epoch": 0.
|
433 |
-
"grad_norm":
|
434 |
-
"learning_rate":
|
435 |
-
"logits/chosen": -2.
|
436 |
-
"logits/rejected": -2.
|
437 |
-
"logps/chosen": -0.
|
438 |
-
"logps/rejected": -0.
|
439 |
-
"loss": 0.
|
440 |
-
"rewards/accuracies": 0.
|
441 |
-
"rewards/chosen": 0.
|
442 |
-
"rewards/margins":
|
443 |
-
"rewards/rejected": -1.
|
444 |
"step": 280
|
445 |
},
|
446 |
{
|
447 |
-
"epoch": 0.
|
448 |
-
"grad_norm":
|
449 |
-
"learning_rate":
|
450 |
-
"logits/chosen": -2.
|
451 |
-
"logits/rejected": -2.
|
452 |
-
"logps/chosen": -
|
453 |
-
"logps/rejected": -0.
|
454 |
-
"loss": 0.
|
455 |
-
"rewards/accuracies": 0.
|
456 |
-
"rewards/chosen":
|
457 |
-
"rewards/margins":
|
458 |
-
"rewards/rejected": -
|
459 |
"step": 290
|
460 |
},
|
461 |
{
|
462 |
-
"epoch": 0.
|
463 |
-
"grad_norm":
|
464 |
-
"learning_rate":
|
465 |
-
"logits/chosen": -2.
|
466 |
-
"logits/rejected": -2.
|
467 |
-
"logps/chosen": -0.
|
468 |
-
"logps/rejected": -0.
|
469 |
-
"loss": 0.
|
470 |
-
"rewards/accuracies": 0.
|
471 |
-
"rewards/chosen": -0.
|
472 |
-
"rewards/margins":
|
473 |
-
"rewards/rejected": -
|
474 |
"step": 300
|
475 |
},
|
476 |
{
|
477 |
-
"epoch": 0.
|
478 |
-
"grad_norm":
|
479 |
-
"learning_rate":
|
480 |
-
"logits/chosen": -2.
|
481 |
-
"logits/rejected": -2.
|
482 |
-
"logps/chosen": -1.
|
483 |
-
"logps/rejected": -1.
|
484 |
-
"loss": 0.
|
485 |
-
"rewards/accuracies": 0.
|
486 |
-
"rewards/chosen": -
|
487 |
-
"rewards/margins":
|
488 |
-
"rewards/rejected": -
|
489 |
"step": 310
|
490 |
},
|
491 |
{
|
492 |
-
"epoch": 0.
|
493 |
-
"grad_norm":
|
494 |
-
"learning_rate":
|
495 |
-
"logits/chosen": -2.
|
496 |
-
"logits/rejected": -2.
|
497 |
-
"logps/chosen": -0.
|
498 |
-
"logps/rejected": -0.
|
499 |
-
"loss": 0.
|
500 |
-
"rewards/accuracies": 0.
|
501 |
-
"rewards/chosen": 0.
|
502 |
-
"rewards/margins":
|
503 |
-
"rewards/rejected": -
|
504 |
"step": 320
|
505 |
},
|
506 |
{
|
507 |
-
"epoch": 0.
|
508 |
-
"grad_norm":
|
509 |
-
"learning_rate":
|
510 |
-
"logits/chosen": -2.
|
511 |
-
"logits/rejected": -2.
|
512 |
-
"logps/chosen": -0.
|
513 |
-
"logps/rejected": -0.
|
514 |
-
"loss": 0.
|
515 |
-
"rewards/accuracies": 0.
|
516 |
-
"rewards/chosen": -0.
|
517 |
-
"rewards/margins":
|
518 |
-
"rewards/rejected": -
|
519 |
"step": 330
|
520 |
},
|
521 |
{
|
522 |
-
"epoch": 0.
|
523 |
-
"grad_norm":
|
524 |
-
"learning_rate":
|
525 |
-
"logits/chosen": -2.
|
526 |
-
"logits/rejected": -2.
|
527 |
-
"logps/chosen": -0.
|
528 |
-
"logps/rejected": -0.
|
529 |
-
"loss": 0.
|
530 |
-
"rewards/accuracies": 0.
|
531 |
-
"rewards/chosen": -
|
532 |
-
"rewards/margins":
|
533 |
-
"rewards/rejected": -
|
534 |
"step": 340
|
535 |
},
|
536 |
{
|
537 |
-
"epoch": 0.
|
538 |
-
"grad_norm":
|
539 |
-
"learning_rate":
|
540 |
-
"logits/chosen": -2.
|
541 |
-
"logits/rejected": -2.
|
542 |
-
"logps/chosen": -0.
|
543 |
-
"logps/rejected": -0.
|
544 |
-
"loss": 0.
|
545 |
-
"rewards/accuracies": 0.
|
546 |
-
"rewards/chosen": -
|
547 |
-
"rewards/margins":
|
548 |
-
"rewards/rejected": -
|
549 |
"step": 350
|
550 |
},
|
551 |
{
|
552 |
-
"epoch": 0.
|
553 |
-
"grad_norm":
|
554 |
-
"learning_rate":
|
555 |
-
"logits/chosen": -2.
|
556 |
-
"logits/rejected": -2.
|
557 |
-
"logps/chosen": -0.
|
558 |
-
"logps/rejected": -1.
|
559 |
-
"loss": 0.
|
560 |
-
"rewards/accuracies": 0.
|
561 |
-
"rewards/chosen": -0.
|
562 |
-
"rewards/margins":
|
563 |
-
"rewards/rejected": -
|
564 |
"step": 360
|
565 |
},
|
566 |
{
|
567 |
-
"epoch": 0.
|
568 |
-
"grad_norm":
|
569 |
-
"learning_rate":
|
570 |
-
"logits/chosen": -2.
|
571 |
-
"logits/rejected": -2.
|
572 |
-
"logps/chosen": -0.
|
573 |
-
"logps/rejected": -0.
|
574 |
-
"loss": 0.
|
575 |
-
"rewards/accuracies": 0.
|
576 |
-
"rewards/chosen":
|
577 |
-
"rewards/margins":
|
578 |
-
"rewards/rejected": -
|
579 |
"step": 370
|
580 |
},
|
581 |
{
|
582 |
-
"epoch": 0.
|
583 |
-
"grad_norm":
|
584 |
-
"learning_rate":
|
585 |
-
"logits/chosen": -2.
|
586 |
-
"logits/rejected": -2.
|
587 |
-
"logps/chosen": -0.
|
588 |
-
"logps/rejected": -0.
|
589 |
-
"loss": 0.
|
590 |
-
"rewards/accuracies": 0.
|
591 |
-
"rewards/chosen": -0.
|
592 |
-
"rewards/margins":
|
593 |
-
"rewards/rejected": -
|
594 |
"step": 380
|
595 |
},
|
596 |
{
|
597 |
-
"epoch": 0.
|
598 |
-
"grad_norm":
|
599 |
-
"learning_rate":
|
600 |
-
"logits/chosen": -2.
|
601 |
-
"logits/rejected": -2.
|
602 |
-
"logps/chosen": -0.
|
603 |
-
"logps/rejected": -
|
604 |
-
"loss": 0.
|
605 |
-
"rewards/accuracies": 0.
|
606 |
-
"rewards/chosen": 0.
|
607 |
-
"rewards/margins":
|
608 |
-
"rewards/rejected": -
|
609 |
"step": 390
|
610 |
},
|
611 |
{
|
612 |
-
"epoch": 0.
|
613 |
-
"grad_norm":
|
614 |
-
"learning_rate":
|
615 |
-
"logits/chosen": -2.
|
616 |
-
"logits/rejected": -2.
|
617 |
-
"logps/chosen": -0.
|
618 |
-
"logps/rejected": -0.
|
619 |
-
"loss": 0.
|
620 |
-
"rewards/accuracies": 0.
|
621 |
-
"rewards/chosen": -0.
|
622 |
-
"rewards/margins":
|
623 |
-
"rewards/rejected": -
|
624 |
"step": 400
|
625 |
},
|
626 |
{
|
627 |
-
"epoch": 0.
|
628 |
-
"grad_norm":
|
629 |
-
"learning_rate":
|
630 |
-
"logits/chosen": -2.
|
631 |
-
"logits/rejected": -2.
|
632 |
-
"logps/chosen": -0.
|
633 |
-
"logps/rejected": -0.
|
634 |
-
"loss": 0.
|
635 |
-
"rewards/accuracies": 0.
|
636 |
-
"rewards/chosen": -0.
|
637 |
-
"rewards/margins":
|
638 |
-
"rewards/rejected": -
|
639 |
"step": 410
|
640 |
},
|
641 |
{
|
642 |
-
"epoch": 0
|
643 |
"step": 413,
|
644 |
"total_flos": 0.0,
|
645 |
-
"train_loss": 0.
|
646 |
-
"train_runtime":
|
647 |
-
"train_samples_per_second":
|
648 |
-
"train_steps_per_second": 0.
|
649 |
}
|
650 |
],
|
651 |
"logging_steps": 10,
|
@@ -653,18 +653,6 @@
|
|
653 |
"num_input_tokens_seen": 0,
|
654 |
"num_train_epochs": 1,
|
655 |
"save_steps": 100,
|
656 |
-
"stateful_callbacks": {
|
657 |
-
"TrainerControl": {
|
658 |
-
"args": {
|
659 |
-
"should_epoch_stop": false,
|
660 |
-
"should_evaluate": false,
|
661 |
-
"should_log": false,
|
662 |
-
"should_save": true,
|
663 |
-
"should_training_stop": false
|
664 |
-
},
|
665 |
-
"attributes": {}
|
666 |
-
}
|
667 |
-
},
|
668 |
"total_flos": 0.0,
|
669 |
"train_batch_size": 4,
|
670 |
"trial_name": null,
|
|
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
+
"epoch": 0.0,
|
13 |
+
"grad_norm": 1308.2122296641476,
|
14 |
+
"learning_rate": 2.3809523809523806e-09,
|
15 |
"logits/chosen": -2.7005977630615234,
|
16 |
"logits/rejected": -2.6288318634033203,
|
17 |
"logps/chosen": -1.1158788204193115,
|
|
|
24 |
"step": 1
|
25 |
},
|
26 |
{
|
27 |
+
"epoch": 0.02,
|
28 |
+
"grad_norm": 1049.0503356036236,
|
29 |
+
"learning_rate": 2.3809523809523807e-08,
|
30 |
+
"logits/chosen": -2.762399435043335,
|
31 |
+
"logits/rejected": -2.6968984603881836,
|
32 |
+
"logps/chosen": -0.8372963070869446,
|
33 |
+
"logps/rejected": -0.8176102638244629,
|
34 |
+
"loss": 0.7125,
|
35 |
+
"rewards/accuracies": 0.4236111044883728,
|
36 |
+
"rewards/chosen": 0.03224152699112892,
|
37 |
+
"rewards/margins": 0.04410284012556076,
|
38 |
+
"rewards/rejected": -0.011861314065754414,
|
39 |
"step": 10
|
40 |
},
|
41 |
{
|
42 |
+
"epoch": 0.05,
|
43 |
+
"grad_norm": 1236.016019303981,
|
44 |
+
"learning_rate": 4.7619047619047613e-08,
|
45 |
+
"logits/chosen": -2.689128875732422,
|
46 |
+
"logits/rejected": -2.64937686920166,
|
47 |
+
"logps/chosen": -0.9927361607551575,
|
48 |
+
"logps/rejected": -1.03745436668396,
|
49 |
+
"loss": 0.7127,
|
50 |
+
"rewards/accuracies": 0.5062500238418579,
|
51 |
+
"rewards/chosen": -0.09930654615163803,
|
52 |
+
"rewards/margins": 0.010919012129306793,
|
53 |
+
"rewards/rejected": -0.11022555828094482,
|
54 |
"step": 20
|
55 |
},
|
56 |
{
|
57 |
+
"epoch": 0.07,
|
58 |
+
"grad_norm": 1468.1565781012905,
|
59 |
+
"learning_rate": 7.142857142857142e-08,
|
60 |
+
"logits/chosen": -2.7302658557891846,
|
61 |
+
"logits/rejected": -2.6768224239349365,
|
62 |
+
"logps/chosen": -0.967939555644989,
|
63 |
+
"logps/rejected": -0.9760215878486633,
|
64 |
+
"loss": 0.6942,
|
65 |
+
"rewards/accuracies": 0.59375,
|
66 |
+
"rewards/chosen": -0.1426212042570114,
|
67 |
+
"rewards/margins": 0.09910523146390915,
|
68 |
+
"rewards/rejected": -0.24172644317150116,
|
69 |
"step": 30
|
70 |
},
|
71 |
{
|
72 |
+
"epoch": 0.1,
|
73 |
+
"grad_norm": 1718.8019373304546,
|
74 |
+
"learning_rate": 9.523809523809523e-08,
|
75 |
+
"logits/chosen": -2.6734580993652344,
|
76 |
+
"logits/rejected": -2.6297881603240967,
|
77 |
+
"logps/chosen": -0.9952117204666138,
|
78 |
+
"logps/rejected": -0.920923113822937,
|
79 |
+
"loss": 0.6688,
|
80 |
+
"rewards/accuracies": 0.675000011920929,
|
81 |
+
"rewards/chosen": -0.6706060171127319,
|
82 |
+
"rewards/margins": 0.4042028486728668,
|
83 |
+
"rewards/rejected": -1.0748088359832764,
|
84 |
"step": 40
|
85 |
},
|
86 |
{
|
87 |
+
"epoch": 0.12,
|
88 |
+
"grad_norm": 886.1855666160817,
|
89 |
+
"learning_rate": 9.988531521210217e-08,
|
90 |
+
"logits/chosen": -2.739663600921631,
|
91 |
+
"logits/rejected": -2.6859798431396484,
|
92 |
+
"logps/chosen": -0.9715211987495422,
|
93 |
+
"logps/rejected": -0.9160677790641785,
|
94 |
+
"loss": 0.6298,
|
95 |
+
"rewards/accuracies": 0.75,
|
96 |
+
"rewards/chosen": -0.4754611551761627,
|
97 |
+
"rewards/margins": 0.44252967834472656,
|
98 |
+
"rewards/rejected": -0.9179908633232117,
|
99 |
"step": 50
|
100 |
},
|
101 |
{
|
102 |
+
"epoch": 0.15,
|
103 |
+
"grad_norm": 1659.7392402989283,
|
104 |
+
"learning_rate": 9.94203097871474e-08,
|
105 |
+
"logits/chosen": -2.743115186691284,
|
106 |
+
"logits/rejected": -2.676964521408081,
|
107 |
+
"logps/chosen": -0.9205001592636108,
|
108 |
+
"logps/rejected": -0.9147119522094727,
|
109 |
+
"loss": 0.6278,
|
110 |
+
"rewards/accuracies": 0.7749999761581421,
|
111 |
+
"rewards/chosen": -0.4151291251182556,
|
112 |
+
"rewards/margins": 0.4165617823600769,
|
113 |
+
"rewards/rejected": -0.8316909074783325,
|
114 |
"step": 60
|
115 |
},
|
116 |
{
|
117 |
+
"epoch": 0.17,
|
118 |
+
"grad_norm": 1314.7754621298307,
|
119 |
+
"learning_rate": 9.860114570402053e-08,
|
120 |
+
"logits/chosen": -2.744157314300537,
|
121 |
+
"logits/rejected": -2.7204043865203857,
|
122 |
+
"logps/chosen": -0.9306680560112,
|
123 |
+
"logps/rejected": -0.8743250966072083,
|
124 |
+
"loss": 0.5734,
|
125 |
+
"rewards/accuracies": 0.793749988079071,
|
126 |
+
"rewards/chosen": -0.3868916630744934,
|
127 |
+
"rewards/margins": 0.6596783399581909,
|
128 |
+
"rewards/rejected": -1.046570062637329,
|
129 |
"step": 70
|
130 |
},
|
131 |
{
|
132 |
+
"epoch": 0.19,
|
133 |
+
"grad_norm": 1220.0578782834486,
|
134 |
+
"learning_rate": 9.743369330335185e-08,
|
135 |
+
"logits/chosen": -2.6871607303619385,
|
136 |
+
"logits/rejected": -2.6331558227539062,
|
137 |
+
"logps/chosen": -0.901233971118927,
|
138 |
+
"logps/rejected": -0.9974308013916016,
|
139 |
+
"loss": 0.5749,
|
140 |
+
"rewards/accuracies": 0.768750011920929,
|
141 |
+
"rewards/chosen": -0.6774497032165527,
|
142 |
+
"rewards/margins": 0.5885565876960754,
|
143 |
+
"rewards/rejected": -1.266006350517273,
|
144 |
"step": 80
|
145 |
},
|
146 |
{
|
147 |
+
"epoch": 0.22,
|
148 |
+
"grad_norm": 906.8663763416491,
|
149 |
+
"learning_rate": 9.592631884948653e-08,
|
150 |
+
"logits/chosen": -2.6903910636901855,
|
151 |
+
"logits/rejected": -2.6411349773406982,
|
152 |
+
"logps/chosen": -1.0175859928131104,
|
153 |
+
"logps/rejected": -1.0327794551849365,
|
154 |
+
"loss": 0.5569,
|
155 |
+
"rewards/accuracies": 0.71875,
|
156 |
+
"rewards/chosen": -0.40117961168289185,
|
157 |
+
"rewards/margins": 0.8878555297851562,
|
158 |
+
"rewards/rejected": -1.2890350818634033,
|
159 |
"step": 90
|
160 |
},
|
161 |
{
|
162 |
+
"epoch": 0.24,
|
163 |
+
"grad_norm": 866.6073604256766,
|
164 |
+
"learning_rate": 9.408982457568138e-08,
|
165 |
+
"logits/chosen": -2.7176403999328613,
|
166 |
+
"logits/rejected": -2.671095371246338,
|
167 |
+
"logps/chosen": -0.8981878161430359,
|
168 |
+
"logps/rejected": -0.9157611131668091,
|
169 |
+
"loss": 0.5818,
|
170 |
"rewards/accuracies": 0.8062499761581421,
|
171 |
+
"rewards/chosen": -0.6816717386245728,
|
172 |
+
"rewards/margins": 0.9627196192741394,
|
173 |
+
"rewards/rejected": -1.6443912982940674,
|
174 |
"step": 100
|
175 |
},
|
176 |
{
|
177 |
+
"epoch": 0.27,
|
178 |
+
"grad_norm": 1160.110812326109,
|
179 |
+
"learning_rate": 9.193737127252132e-08,
|
180 |
+
"logits/chosen": -2.7276268005371094,
|
181 |
+
"logits/rejected": -2.6847987174987793,
|
182 |
+
"logps/chosen": -0.9827289581298828,
|
183 |
+
"logps/rejected": -0.94146728515625,
|
184 |
+
"loss": 0.5535,
|
185 |
+
"rewards/accuracies": 0.762499988079071,
|
186 |
+
"rewards/chosen": -0.4507879614830017,
|
187 |
+
"rewards/margins": 0.8122564554214478,
|
188 |
+
"rewards/rejected": -1.2630443572998047,
|
189 |
"step": 110
|
190 |
},
|
191 |
{
|
192 |
+
"epoch": 0.29,
|
193 |
+
"grad_norm": 1118.8296262026665,
|
194 |
+
"learning_rate": 8.94843839743072e-08,
|
195 |
+
"logits/chosen": -2.684906244277954,
|
196 |
+
"logits/rejected": -2.6362223625183105,
|
197 |
+
"logps/chosen": -0.8857740163803101,
|
198 |
+
"logps/rejected": -0.9783684611320496,
|
199 |
+
"loss": 0.5057,
|
200 |
+
"rewards/accuracies": 0.831250011920929,
|
201 |
+
"rewards/chosen": -0.4952009320259094,
|
202 |
+
"rewards/margins": 1.4160592555999756,
|
203 |
+
"rewards/rejected": -1.9112603664398193,
|
204 |
"step": 120
|
205 |
},
|
206 |
{
|
207 |
+
"epoch": 0.31,
|
208 |
+
"grad_norm": 816.4913832702339,
|
209 |
+
"learning_rate": 8.674844141929039e-08,
|
210 |
+
"logits/chosen": -2.7162153720855713,
|
211 |
+
"logits/rejected": -2.680973529815674,
|
212 |
+
"logps/chosen": -0.8731144666671753,
|
213 |
+
"logps/rejected": -0.8701594471931458,
|
214 |
+
"loss": 0.5054,
|
215 |
+
"rewards/accuracies": 0.831250011920929,
|
216 |
+
"rewards/chosen": -0.34728819131851196,
|
217 |
+
"rewards/margins": 1.2909767627716064,
|
218 |
+
"rewards/rejected": -1.6382650136947632,
|
219 |
"step": 130
|
220 |
},
|
221 |
{
|
222 |
+
"epoch": 0.34,
|
223 |
+
"grad_norm": 1469.7710854903712,
|
224 |
+
"learning_rate": 8.374915007591053e-08,
|
225 |
+
"logits/chosen": -2.691988945007324,
|
226 |
+
"logits/rejected": -2.652631998062134,
|
227 |
+
"logps/chosen": -0.9311792254447937,
|
228 |
+
"logps/rejected": -0.9303830862045288,
|
229 |
+
"loss": 0.534,
|
230 |
+
"rewards/accuracies": 0.7875000238418579,
|
231 |
+
"rewards/chosen": -0.5356858968734741,
|
232 |
+
"rewards/margins": 1.0560630559921265,
|
233 |
+
"rewards/rejected": -1.5917489528656006,
|
234 |
"step": 140
|
235 |
},
|
236 |
{
|
237 |
+
"epoch": 0.36,
|
238 |
+
"grad_norm": 1688.4287131310427,
|
239 |
+
"learning_rate": 8.05080036377971e-08,
|
240 |
+
"logits/chosen": -2.7248551845550537,
|
241 |
+
"logits/rejected": -2.6649651527404785,
|
242 |
+
"logps/chosen": -0.9584500193595886,
|
243 |
+
"logps/rejected": -0.9874745607376099,
|
244 |
+
"loss": 0.5552,
|
245 |
+
"rewards/accuracies": 0.768750011920929,
|
246 |
+
"rewards/chosen": -0.24187500774860382,
|
247 |
+
"rewards/margins": 1.0612785816192627,
|
248 |
+
"rewards/rejected": -1.303153395652771,
|
249 |
"step": 150
|
250 |
},
|
251 |
{
|
252 |
+
"epoch": 0.39,
|
253 |
+
"grad_norm": 883.2476388726114,
|
254 |
+
"learning_rate": 7.704822899442949e-08,
|
255 |
+
"logits/chosen": -2.717927932739258,
|
256 |
+
"logits/rejected": -2.6812281608581543,
|
257 |
+
"logps/chosen": -0.9134844541549683,
|
258 |
+
"logps/rejected": -0.9353663325309753,
|
259 |
+
"loss": 0.5424,
|
260 |
"rewards/accuracies": 0.824999988079071,
|
261 |
+
"rewards/chosen": -0.3312217593193054,
|
262 |
+
"rewards/margins": 1.2677682638168335,
|
263 |
+
"rewards/rejected": -1.5989899635314941,
|
264 |
"step": 160
|
265 |
},
|
266 |
{
|
267 |
+
"epoch": 0.41,
|
268 |
+
"grad_norm": 1741.9681865719251,
|
269 |
+
"learning_rate": 7.339461978126947e-08,
|
270 |
+
"logits/chosen": -2.696554660797119,
|
271 |
+
"logits/rejected": -2.628129482269287,
|
272 |
+
"logps/chosen": -0.896826446056366,
|
273 |
+
"logps/rejected": -0.8772991895675659,
|
274 |
+
"loss": 0.5503,
|
275 |
+
"rewards/accuracies": 0.800000011920929,
|
276 |
+
"rewards/chosen": -0.48312321305274963,
|
277 |
+
"rewards/margins": 1.3627839088439941,
|
278 |
+
"rewards/rejected": -1.845907211303711,
|
279 |
"step": 170
|
280 |
},
|
281 |
{
|
282 |
+
"epoch": 0.44,
|
283 |
+
"grad_norm": 1946.5851641629447,
|
284 |
+
"learning_rate": 6.957335870218904e-08,
|
285 |
+
"logits/chosen": -2.673733949661255,
|
286 |
+
"logits/rejected": -2.6041712760925293,
|
287 |
+
"logps/chosen": -0.939963698387146,
|
288 |
+
"logps/rejected": -0.956584095954895,
|
289 |
+
"loss": 0.4957,
|
290 |
+
"rewards/accuracies": 0.8500000238418579,
|
291 |
+
"rewards/chosen": 0.2670658528804779,
|
292 |
+
"rewards/margins": 1.5085264444351196,
|
293 |
+
"rewards/rejected": -1.2414608001708984,
|
294 |
"step": 180
|
295 |
},
|
296 |
{
|
297 |
+
"epoch": 0.46,
|
298 |
+
"grad_norm": 2276.5142714905005,
|
299 |
+
"learning_rate": 6.56118298974763e-08,
|
300 |
+
"logits/chosen": -2.732027292251587,
|
301 |
+
"logits/rejected": -2.7048563957214355,
|
302 |
+
"logps/chosen": -0.9618963003158569,
|
303 |
+
"logps/rejected": -1.0246574878692627,
|
304 |
+
"loss": 0.5708,
|
305 |
+
"rewards/accuracies": 0.7749999761581421,
|
306 |
+
"rewards/chosen": -0.570631742477417,
|
307 |
+
"rewards/margins": 1.4420315027236938,
|
308 |
+
"rewards/rejected": -2.012662887573242,
|
309 |
"step": 190
|
310 |
},
|
311 |
{
|
312 |
+
"epoch": 0.48,
|
313 |
+
"grad_norm": 1178.2661546943384,
|
314 |
+
"learning_rate": 6.153842270203887e-08,
|
315 |
+
"logits/chosen": -2.6935534477233887,
|
316 |
+
"logits/rejected": -2.6572394371032715,
|
317 |
+
"logps/chosen": -0.8579891920089722,
|
318 |
+
"logps/rejected": -0.931664764881134,
|
319 |
+
"loss": 0.5316,
|
320 |
+
"rewards/accuracies": 0.793749988079071,
|
321 |
+
"rewards/chosen": -0.0793720930814743,
|
322 |
+
"rewards/margins": 1.138346552848816,
|
323 |
+
"rewards/rejected": -1.217718482017517,
|
324 |
"step": 200
|
325 |
},
|
326 |
{
|
327 |
+
"epoch": 0.51,
|
328 |
+
"grad_norm": 913.5130412259963,
|
329 |
+
"learning_rate": 5.738232820012406e-08,
|
330 |
+
"logits/chosen": -2.7250068187713623,
|
331 |
+
"logits/rejected": -2.6632461547851562,
|
332 |
+
"logps/chosen": -1.0246375799179077,
|
333 |
+
"logps/rejected": -0.9633069038391113,
|
334 |
+
"loss": 0.5393,
|
335 |
+
"rewards/accuracies": 0.768750011920929,
|
336 |
+
"rewards/chosen": -0.25627315044403076,
|
337 |
+
"rewards/margins": 1.4161301851272583,
|
338 |
+
"rewards/rejected": -1.672403335571289,
|
339 |
"step": 210
|
340 |
},
|
341 |
{
|
342 |
+
"epoch": 0.53,
|
343 |
+
"grad_norm": 1025.54215312441,
|
344 |
+
"learning_rate": 5.317333003449687e-08,
|
345 |
+
"logits/chosen": -2.733330011367798,
|
346 |
+
"logits/rejected": -2.715299606323242,
|
347 |
+
"logps/chosen": -0.8904998898506165,
|
348 |
+
"logps/rejected": -0.8720332980155945,
|
349 |
+
"loss": 0.5228,
|
350 |
+
"rewards/accuracies": 0.8062499761581421,
|
351 |
+
"rewards/chosen": 0.2184334546327591,
|
352 |
+
"rewards/margins": 1.4306997060775757,
|
353 |
+
"rewards/rejected": -1.212266206741333,
|
354 |
"step": 220
|
355 |
},
|
356 |
{
|
357 |
+
"epoch": 0.56,
|
358 |
+
"grad_norm": 932.0109937677821,
|
359 |
+
"learning_rate": 4.894159096919109e-08,
|
360 |
+
"logits/chosen": -2.6878042221069336,
|
361 |
+
"logits/rejected": -2.65002179145813,
|
362 |
+
"logps/chosen": -0.9498124122619629,
|
363 |
+
"logps/rejected": -0.9380944967269897,
|
364 |
+
"loss": 0.4868,
|
365 |
"rewards/accuracies": 0.75,
|
366 |
+
"rewards/chosen": -0.5880553722381592,
|
367 |
+
"rewards/margins": 1.365509271621704,
|
368 |
+
"rewards/rejected": -1.9535646438598633,
|
369 |
"step": 230
|
370 |
},
|
371 |
{
|
372 |
+
"epoch": 0.58,
|
373 |
+
"grad_norm": 936.3493959909762,
|
374 |
+
"learning_rate": 4.471743673537994e-08,
|
375 |
+
"logits/chosen": -2.7069993019104004,
|
376 |
+
"logits/rejected": -2.6865086555480957,
|
377 |
+
"logps/chosen": -0.9886058568954468,
|
378 |
+
"logps/rejected": -0.9898191690444946,
|
379 |
+
"loss": 0.5347,
|
380 |
+
"rewards/accuracies": 0.7875000238418579,
|
381 |
+
"rewards/chosen": 0.04917572811245918,
|
382 |
+
"rewards/margins": 1.6429868936538696,
|
383 |
+
"rewards/rejected": -1.5938111543655396,
|
384 |
"step": 240
|
385 |
},
|
386 |
{
|
387 |
+
"epoch": 0.6,
|
388 |
+
"grad_norm": 984.7410594483609,
|
389 |
+
"learning_rate": 4.053113870938224e-08,
|
390 |
+
"logits/chosen": -2.758028507232666,
|
391 |
+
"logits/rejected": -2.679352283477783,
|
392 |
+
"logps/chosen": -0.8968666791915894,
|
393 |
+
"logps/rejected": -0.9234074354171753,
|
394 |
+
"loss": 0.5007,
|
395 |
+
"rewards/accuracies": 0.8374999761581421,
|
396 |
+
"rewards/chosen": 0.0006614074227400124,
|
397 |
+
"rewards/margins": 1.3911110162734985,
|
398 |
+
"rewards/rejected": -1.3904496431350708,
|
399 |
"step": 250
|
400 |
},
|
401 |
{
|
402 |
+
"epoch": 0.63,
|
403 |
+
"grad_norm": 2539.435048877582,
|
404 |
+
"learning_rate": 3.641269698018933e-08,
|
405 |
+
"logits/chosen": -2.713007926940918,
|
406 |
+
"logits/rejected": -2.6481966972351074,
|
407 |
+
"logps/chosen": -1.007727861404419,
|
408 |
+
"logps/rejected": -0.9894694089889526,
|
409 |
+
"loss": 0.497,
|
410 |
+
"rewards/accuracies": 0.824999988079071,
|
411 |
+
"rewards/chosen": 0.06799235194921494,
|
412 |
+
"rewards/margins": 1.8265388011932373,
|
413 |
+
"rewards/rejected": -1.7585465908050537,
|
414 |
"step": 260
|
415 |
},
|
416 |
{
|
417 |
+
"epoch": 0.65,
|
418 |
+
"grad_norm": 1438.0528773750598,
|
419 |
+
"learning_rate": 3.2391625361107026e-08,
|
420 |
+
"logits/chosen": -2.7407853603363037,
|
421 |
+
"logits/rejected": -2.6641056537628174,
|
422 |
+
"logps/chosen": -0.9492539167404175,
|
423 |
+
"logps/rejected": -0.9392199516296387,
|
424 |
+
"loss": 0.5391,
|
425 |
"rewards/accuracies": 0.862500011920929,
|
426 |
+
"rewards/chosen": -0.02730640210211277,
|
427 |
+
"rewards/margins": 1.7186416387557983,
|
428 |
+
"rewards/rejected": -1.745948076248169,
|
429 |
"step": 270
|
430 |
},
|
431 |
{
|
432 |
+
"epoch": 0.68,
|
433 |
+
"grad_norm": 1220.7276779461947,
|
434 |
+
"learning_rate": 2.8496739886173992e-08,
|
435 |
+
"logits/chosen": -2.697509288787842,
|
436 |
+
"logits/rejected": -2.640756607055664,
|
437 |
+
"logps/chosen": -0.9139662981033325,
|
438 |
+
"logps/rejected": -0.9015041589736938,
|
439 |
+
"loss": 0.4723,
|
440 |
+
"rewards/accuracies": 0.84375,
|
441 |
+
"rewards/chosen": 0.023266727104783058,
|
442 |
+
"rewards/margins": 1.4177985191345215,
|
443 |
+
"rewards/rejected": -1.3945319652557373,
|
444 |
"step": 280
|
445 |
},
|
446 |
{
|
447 |
+
"epoch": 0.7,
|
448 |
+
"grad_norm": 1542.4514800663226,
|
449 |
+
"learning_rate": 2.4755952307046063e-08,
|
450 |
+
"logits/chosen": -2.742187023162842,
|
451 |
+
"logits/rejected": -2.699744701385498,
|
452 |
+
"logps/chosen": -1.0006037950515747,
|
453 |
+
"logps/rejected": -0.9843395352363586,
|
454 |
+
"loss": 0.448,
|
455 |
+
"rewards/accuracies": 0.831250011920929,
|
456 |
+
"rewards/chosen": 0.14072290062904358,
|
457 |
+
"rewards/margins": 1.5138235092163086,
|
458 |
+
"rewards/rejected": -1.3731005191802979,
|
459 |
"step": 290
|
460 |
},
|
461 |
{
|
462 |
+
"epoch": 0.73,
|
463 |
+
"grad_norm": 1452.2302527315665,
|
464 |
+
"learning_rate": 2.1196070070200995e-08,
|
465 |
+
"logits/chosen": -2.7377541065216064,
|
466 |
+
"logits/rejected": -2.6858716011047363,
|
467 |
+
"logps/chosen": -0.900943398475647,
|
468 |
+
"logps/rejected": -0.9453694224357605,
|
469 |
+
"loss": 0.484,
|
470 |
+
"rewards/accuracies": 0.824999988079071,
|
471 |
+
"rewards/chosen": -0.028856370598077774,
|
472 |
+
"rewards/margins": 1.6425704956054688,
|
473 |
+
"rewards/rejected": -1.671426773071289,
|
474 |
"step": 300
|
475 |
},
|
476 |
{
|
477 |
+
"epoch": 0.75,
|
478 |
+
"grad_norm": 966.038328895732,
|
479 |
+
"learning_rate": 1.7842604207878005e-08,
|
480 |
+
"logits/chosen": -2.7250287532806396,
|
481 |
+
"logits/rejected": -2.6620233058929443,
|
482 |
+
"logps/chosen": -1.0749655961990356,
|
483 |
+
"logps/rejected": -1.015625238418579,
|
484 |
+
"loss": 0.4403,
|
485 |
+
"rewards/accuracies": 0.8062499761581421,
|
486 |
+
"rewards/chosen": -0.16425299644470215,
|
487 |
+
"rewards/margins": 1.6934306621551514,
|
488 |
+
"rewards/rejected": -1.8576834201812744,
|
489 |
"step": 310
|
490 |
},
|
491 |
{
|
492 |
+
"epoch": 0.77,
|
493 |
+
"grad_norm": 1686.580221181831,
|
494 |
+
"learning_rate": 1.4719586519455534e-08,
|
495 |
+
"logits/chosen": -2.728663444519043,
|
496 |
+
"logits/rejected": -2.64876389503479,
|
497 |
+
"logps/chosen": -0.9033769369125366,
|
498 |
+
"logps/rejected": -0.9375128746032715,
|
499 |
+
"loss": 0.4301,
|
500 |
+
"rewards/accuracies": 0.9125000238418579,
|
501 |
+
"rewards/chosen": 0.11283926665782928,
|
502 |
+
"rewards/margins": 2.1201956272125244,
|
503 |
+
"rewards/rejected": -2.0073564052581787,
|
504 |
"step": 320
|
505 |
},
|
506 |
{
|
507 |
+
"epoch": 0.8,
|
508 |
+
"grad_norm": 1227.1950699118374,
|
509 |
+
"learning_rate": 1.18493973533924e-08,
|
510 |
+
"logits/chosen": -2.679917097091675,
|
511 |
+
"logits/rejected": -2.611525297164917,
|
512 |
+
"logps/chosen": -0.9286376237869263,
|
513 |
+
"logps/rejected": -0.9542851448059082,
|
514 |
+
"loss": 0.4554,
|
515 |
+
"rewards/accuracies": 0.800000011920929,
|
516 |
+
"rewards/chosen": -0.18441525101661682,
|
517 |
+
"rewards/margins": 1.7495098114013672,
|
518 |
+
"rewards/rejected": -1.9339250326156616,
|
519 |
"step": 330
|
520 |
},
|
521 |
{
|
522 |
+
"epoch": 0.82,
|
523 |
+
"grad_norm": 1294.7256357947958,
|
524 |
+
"learning_rate": 9.252605223891208e-09,
|
525 |
+
"logits/chosen": -2.759120464324951,
|
526 |
+
"logits/rejected": -2.6892759799957275,
|
527 |
+
"logps/chosen": -0.8945956230163574,
|
528 |
+
"logps/rejected": -0.9347489476203918,
|
529 |
+
"loss": 0.5031,
|
530 |
+
"rewards/accuracies": 0.8125,
|
531 |
+
"rewards/chosen": -0.42182081937789917,
|
532 |
+
"rewards/margins": 1.480148196220398,
|
533 |
+
"rewards/rejected": -1.9019691944122314,
|
534 |
"step": 340
|
535 |
},
|
536 |
{
|
537 |
+
"epoch": 0.85,
|
538 |
+
"grad_norm": 914.832611398372,
|
539 |
+
"learning_rate": 6.947819411632222e-09,
|
540 |
+
"logits/chosen": -2.6895413398742676,
|
541 |
+
"logits/rejected": -2.635143995285034,
|
542 |
+
"logps/chosen": -0.9759384393692017,
|
543 |
+
"logps/rejected": -0.96312016248703,
|
544 |
+
"loss": 0.4451,
|
545 |
+
"rewards/accuracies": 0.831250011920929,
|
546 |
+
"rewards/chosen": -0.2653306722640991,
|
547 |
+
"rewards/margins": 1.628769874572754,
|
548 |
+
"rewards/rejected": -1.894100546836853,
|
549 |
"step": 350
|
550 |
},
|
551 |
{
|
552 |
+
"epoch": 0.87,
|
553 |
+
"grad_norm": 1785.1282881305524,
|
554 |
+
"learning_rate": 4.951556604879048e-09,
|
555 |
+
"logits/chosen": -2.7073302268981934,
|
556 |
+
"logits/rejected": -2.661701202392578,
|
557 |
+
"logps/chosen": -0.9704357385635376,
|
558 |
+
"logps/rejected": -1.0066497325897217,
|
559 |
+
"loss": 0.4535,
|
560 |
+
"rewards/accuracies": 0.7749999761581421,
|
561 |
+
"rewards/chosen": -0.30233365297317505,
|
562 |
+
"rewards/margins": 1.4092557430267334,
|
563 |
+
"rewards/rejected": -1.7115894556045532,
|
564 |
"step": 360
|
565 |
},
|
566 |
{
|
567 |
+
"epoch": 0.89,
|
568 |
+
"grad_norm": 821.6518799466353,
|
569 |
+
"learning_rate": 3.278122536639888e-09,
|
570 |
+
"logits/chosen": -2.7481675148010254,
|
571 |
+
"logits/rejected": -2.692375898361206,
|
572 |
+
"logps/chosen": -0.8963810205459595,
|
573 |
+
"logps/rejected": -0.8914927244186401,
|
574 |
+
"loss": 0.4351,
|
575 |
+
"rewards/accuracies": 0.8374999761581421,
|
576 |
+
"rewards/chosen": 0.013069706968963146,
|
577 |
+
"rewards/margins": 1.6356351375579834,
|
578 |
+
"rewards/rejected": -1.622565507888794,
|
579 |
"step": 370
|
580 |
},
|
581 |
{
|
582 |
+
"epoch": 0.92,
|
583 |
+
"grad_norm": 820.7523726208289,
|
584 |
+
"learning_rate": 1.9395094661033118e-09,
|
585 |
+
"logits/chosen": -2.717128276824951,
|
586 |
+
"logits/rejected": -2.693026542663574,
|
587 |
+
"logps/chosen": -0.9069439172744751,
|
588 |
+
"logps/rejected": -0.9684022068977356,
|
589 |
+
"loss": 0.4381,
|
590 |
+
"rewards/accuracies": 0.831250011920929,
|
591 |
+
"rewards/chosen": -0.17060108482837677,
|
592 |
+
"rewards/margins": 1.5492388010025024,
|
593 |
+
"rewards/rejected": -1.7198398113250732,
|
594 |
"step": 380
|
595 |
},
|
596 |
{
|
597 |
+
"epoch": 0.94,
|
598 |
+
"grad_norm": 791.5724729924565,
|
599 |
+
"learning_rate": 9.453102390227175e-10,
|
600 |
+
"logits/chosen": -2.694126605987549,
|
601 |
+
"logits/rejected": -2.655355453491211,
|
602 |
+
"logps/chosen": -0.9297587275505066,
|
603 |
+
"logps/rejected": -0.9920668601989746,
|
604 |
+
"loss": 0.4482,
|
605 |
+
"rewards/accuracies": 0.856249988079071,
|
606 |
+
"rewards/chosen": -0.0569925419986248,
|
607 |
+
"rewards/margins": 1.6660388708114624,
|
608 |
+
"rewards/rejected": -1.7230314016342163,
|
609 |
"step": 390
|
610 |
},
|
611 |
{
|
612 |
+
"epoch": 0.97,
|
613 |
+
"grad_norm": 1054.605495809102,
|
614 |
+
"learning_rate": 3.0264954291494007e-10,
|
615 |
+
"logits/chosen": -2.7315666675567627,
|
616 |
+
"logits/rejected": -2.6392362117767334,
|
617 |
+
"logps/chosen": -0.970044732093811,
|
618 |
+
"logps/rejected": -0.9285378456115723,
|
619 |
+
"loss": 0.4633,
|
620 |
+
"rewards/accuracies": 0.84375,
|
621 |
+
"rewards/chosen": -0.0074133919551968575,
|
622 |
+
"rewards/margins": 1.79477858543396,
|
623 |
+
"rewards/rejected": -1.802191972732544,
|
624 |
"step": 400
|
625 |
},
|
626 |
{
|
627 |
+
"epoch": 0.99,
|
628 |
+
"grad_norm": 1541.6219713183218,
|
629 |
+
"learning_rate": 1.6132849715988494e-11,
|
630 |
+
"logits/chosen": -2.7301852703094482,
|
631 |
+
"logits/rejected": -2.6915435791015625,
|
632 |
+
"logps/chosen": -0.9321584701538086,
|
633 |
+
"logps/rejected": -0.9507132768630981,
|
634 |
+
"loss": 0.4578,
|
635 |
+
"rewards/accuracies": 0.8687499761581421,
|
636 |
+
"rewards/chosen": -0.2989009618759155,
|
637 |
+
"rewards/margins": 1.854230523109436,
|
638 |
+
"rewards/rejected": -2.1531314849853516,
|
639 |
"step": 410
|
640 |
},
|
641 |
{
|
642 |
+
"epoch": 1.0,
|
643 |
"step": 413,
|
644 |
"total_flos": 0.0,
|
645 |
+
"train_loss": 0.5277958785073232,
|
646 |
+
"train_runtime": 6372.8883,
|
647 |
+
"train_samples_per_second": 8.304,
|
648 |
+
"train_steps_per_second": 0.065
|
649 |
}
|
650 |
],
|
651 |
"logging_steps": 10,
|
|
|
653 |
"num_input_tokens_seen": 0,
|
654 |
"num_train_epochs": 1,
|
655 |
"save_steps": 100,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
656 |
"total_flos": 0.0,
|
657 |
"train_batch_size": 4,
|
658 |
"trial_name": null,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93c9cdc5d47cf48041862ca767083718bee40c0c9d421521bddf246b76bcabfa
|
3 |
size 6264
|