wzhouad commited on
Commit
34c0756
1 Parent(s): 0bfa4e2

Model save

Browse files
README.md CHANGED
@@ -35,7 +35,7 @@ The following hyperparameters were used during training:
35
  - learning_rate: 2e-06
36
  - train_batch_size: 2
37
  - eval_batch_size: 8
38
- - seed: 1
39
  - distributed_type: multi-GPU
40
  - num_devices: 8
41
  - gradient_accumulation_steps: 8
@@ -44,7 +44,7 @@ The following hyperparameters were used during training:
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: cosine
46
  - lr_scheduler_warmup_ratio: 0.1
47
- - num_epochs: 1
48
 
49
  ### Training results
50
 
 
35
  - learning_rate: 2e-06
36
  - train_batch_size: 2
37
  - eval_batch_size: 8
38
+ - seed: 2
39
  - distributed_type: multi-GPU
40
  - num_devices: 8
41
  - gradient_accumulation_steps: 8
 
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: cosine
46
  - lr_scheduler_warmup_ratio: 0.1
47
+ - num_epochs: 2
48
 
49
  ### Training results
50
 
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "train_loss": 0.30111024614790793,
4
- "train_runtime": 5278.2802,
5
  "train_samples": 45548,
6
- "train_samples_per_second": 8.629,
7
  "train_steps_per_second": 0.067
8
  }
 
1
  {
2
+ "epoch": 2.0,
3
+ "train_loss": 0.21065937945960272,
4
+ "train_runtime": 10560.1161,
5
  "train_samples": 45548,
6
+ "train_samples_per_second": 8.626,
7
  "train_steps_per_second": 0.067
8
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f99564dd0b61c7960459a4d1dfdd645c1a78dc3e9fb3833889cd3c356f999c7
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:017ec8abde50ce1610f6890d47e40e50eff062df000d1fa889ec708d377a3118
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8800f44937f05d718368505c0913363a01793524ed590c7a4de9fbaf4903cda1
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b58c87e610451d748c1d531a107e90bb750f3b5e94c11d0a293007ceaa82ec11
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e8602e8be4a1b909cf92a9245cbaff8682a9966b1ca54cf1713d1447aee48d0
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a95089f9da264ce34eb8ac1c56338154e6e293ee5fb51ab15ee7ba91df63e46
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4caf53fe4f749740b40900d01c4e026065a5cd96a5dc8400797a1228b7dd7149
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bac8791b6da670c1b08dbffd9c1fb56e46109f5846709c905830aa4d5a751715
3
  size 1168138808
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "train_loss": 0.30111024614790793,
4
- "train_runtime": 5278.2802,
5
  "train_samples": 45548,
6
- "train_samples_per_second": 8.629,
7
  "train_steps_per_second": 0.067
8
  }
 
1
  {
2
+ "epoch": 2.0,
3
+ "train_loss": 0.21065937945960272,
4
+ "train_runtime": 10560.1161,
5
  "train_samples": 45548,
6
+ "train_samples_per_second": 8.626,
7
  "train_steps_per_second": 0.067
8
  }
trainer_state.json CHANGED
@@ -1,516 +1,1020 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9975412715138743,
5
  "eval_steps": 10000,
6
- "global_step": 355,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.03,
13
- "learning_rate": 5.555555555555555e-07,
14
- "logits/chosen": -0.09875188767910004,
15
- "logits/rejected": 0.0006220974028110504,
16
- "logps/chosen": -327.19207763671875,
17
- "logps/rejected": -192.9109344482422,
18
- "loss": 0.5078,
19
- "rewards/accuracies": 0.48750001192092896,
20
- "rewards/chosen": 0.000275815516943112,
21
- "rewards/margins": 0.0012074653059244156,
22
- "rewards/rejected": -0.0009316497598774731,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.06,
27
- "learning_rate": 1.111111111111111e-06,
28
- "logits/chosen": -0.061609845608472824,
29
- "logits/rejected": -0.0017540112603455782,
30
- "logps/chosen": -295.9917297363281,
31
- "logps/rejected": -183.76776123046875,
32
- "loss": 0.5151,
33
- "rewards/accuracies": 0.5625,
34
- "rewards/chosen": 0.007897479459643364,
35
- "rewards/margins": 0.025164177641272545,
36
- "rewards/rejected": -0.01726669631898403,
37
  "step": 20
38
  },
39
  {
40
  "epoch": 0.08,
41
- "learning_rate": 1.6666666666666667e-06,
42
- "logits/chosen": -0.027067899703979492,
43
- "logits/rejected": 0.09714551270008087,
44
- "logps/chosen": -340.88226318359375,
45
- "logps/rejected": -180.27139282226562,
46
- "loss": 0.5184,
47
  "rewards/accuracies": 0.574999988079071,
48
- "rewards/chosen": 0.03142847493290901,
49
- "rewards/margins": 0.1530241072177887,
50
- "rewards/rejected": -0.12159563601016998,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.11,
55
- "learning_rate": 1.999224195661986e-06,
56
- "logits/chosen": -0.08356816321611404,
57
- "logits/rejected": 0.00294572114944458,
58
- "logps/chosen": -316.5859680175781,
59
- "logps/rejected": -197.39263916015625,
60
- "loss": 0.5192,
61
- "rewards/accuracies": 0.643750011920929,
62
- "rewards/chosen": -0.1317664086818695,
63
- "rewards/margins": 0.20280234515666962,
64
- "rewards/rejected": -0.3345687687397003,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 0.14,
69
- "learning_rate": 1.9905102152171726e-06,
70
- "logits/chosen": -0.12071399390697479,
71
- "logits/rejected": -0.04788393899798393,
72
- "logps/chosen": -332.5281066894531,
73
- "logps/rejected": -256.2742004394531,
74
- "loss": 0.4751,
75
- "rewards/accuracies": 0.581250011920929,
76
- "rewards/chosen": -0.09092732518911362,
77
- "rewards/margins": 0.18247266113758087,
78
- "rewards/rejected": -0.2734000086784363,
79
  "step": 50
80
  },
81
  {
82
  "epoch": 0.17,
83
- "learning_rate": 1.9721972279988477e-06,
84
- "logits/chosen": -0.22579865157604218,
85
- "logits/rejected": -0.1116095557808876,
86
- "logps/chosen": -396.06378173828125,
87
- "logps/rejected": -215.12844848632812,
88
- "loss": 0.4328,
89
- "rewards/accuracies": 0.71875,
90
- "rewards/chosen": -0.09131719172000885,
91
- "rewards/margins": 0.4695609211921692,
92
- "rewards/rejected": -0.5608780384063721,
93
  "step": 60
94
  },
95
  {
96
  "epoch": 0.2,
97
- "learning_rate": 1.9444627046536053e-06,
98
- "logits/chosen": -0.21056826412677765,
99
- "logits/rejected": -0.09725789725780487,
100
- "logps/chosen": -391.73223876953125,
101
- "logps/rejected": -242.24807739257812,
102
- "loss": 0.426,
103
  "rewards/accuracies": 0.6625000238418579,
104
- "rewards/chosen": -0.25291183590888977,
105
- "rewards/margins": 0.42755264043807983,
106
- "rewards/rejected": -0.6804644465446472,
107
  "step": 70
108
  },
109
  {
110
  "epoch": 0.22,
111
- "learning_rate": 1.907575419670957e-06,
112
- "logits/chosen": -0.15251095592975616,
113
- "logits/rejected": -0.10242275148630142,
114
- "logps/chosen": -286.48260498046875,
115
- "logps/rejected": -209.681884765625,
116
- "loss": 0.3996,
117
- "rewards/accuracies": 0.6187499761581421,
118
- "rewards/chosen": -0.2705255448818207,
119
- "rewards/margins": 0.31923907995224,
120
- "rewards/rejected": -0.5897646546363831,
121
  "step": 80
122
  },
123
  {
124
  "epoch": 0.25,
125
- "learning_rate": 1.861892846697277e-06,
126
- "logits/chosen": -0.2654028832912445,
127
- "logits/rejected": -0.21472935378551483,
128
- "logps/chosen": -388.46490478515625,
129
- "logps/rejected": -240.60079956054688,
130
- "loss": 0.3895,
131
- "rewards/accuracies": 0.762499988079071,
132
- "rewards/chosen": -0.2387591302394867,
133
- "rewards/margins": 0.526225209236145,
134
- "rewards/rejected": -0.7649842500686646,
135
  "step": 90
136
  },
137
  {
138
  "epoch": 0.28,
139
- "learning_rate": 1.8078576942687008e-06,
140
- "logits/chosen": -0.28679296374320984,
141
- "logits/rejected": -0.2434661090373993,
142
- "logps/chosen": -391.65081787109375,
143
- "logps/rejected": -258.97772216796875,
144
- "loss": 0.3588,
145
- "rewards/accuracies": 0.65625,
146
- "rewards/chosen": -0.48451095819473267,
147
- "rewards/margins": 0.38693904876708984,
148
- "rewards/rejected": -0.8714500665664673,
149
  "step": 100
150
  },
151
  {
152
  "epoch": 0.31,
153
- "learning_rate": 1.7459936155350907e-06,
154
- "logits/chosen": -0.35570335388183594,
155
- "logits/rejected": -0.3073640465736389,
156
- "logps/chosen": -344.8442687988281,
157
- "logps/rejected": -265.22894287109375,
158
- "loss": 0.3365,
159
- "rewards/accuracies": 0.675000011920929,
160
- "rewards/chosen": -0.5254753232002258,
161
- "rewards/margins": 0.33020466566085815,
162
- "rewards/rejected": -0.855679988861084,
163
  "step": 110
164
  },
165
  {
166
  "epoch": 0.34,
167
- "learning_rate": 1.6769001335520179e-06,
168
- "logits/chosen": -0.4098650813102722,
169
- "logits/rejected": -0.3523326814174652,
170
- "logps/chosen": -385.4635314941406,
171
- "logps/rejected": -273.0743408203125,
172
- "loss": 0.3171,
173
- "rewards/accuracies": 0.65625,
174
- "rewards/chosen": -0.6758801937103271,
175
- "rewards/margins": 0.47352728247642517,
176
- "rewards/rejected": -1.1494075059890747,
177
  "step": 120
178
  },
179
  {
180
  "epoch": 0.37,
181
- "learning_rate": 1.6012468313196084e-06,
182
- "logits/chosen": -0.46511369943618774,
183
- "logits/rejected": -0.414213091135025,
184
- "logps/chosen": -450.9639587402344,
185
- "logps/rejected": -316.42022705078125,
186
- "loss": 0.2859,
187
- "rewards/accuracies": 0.668749988079071,
188
- "rewards/chosen": -1.0311425924301147,
189
- "rewards/margins": 0.6003210544586182,
190
- "rewards/rejected": -1.631463646888733,
191
  "step": 130
192
  },
193
  {
194
  "epoch": 0.39,
195
- "learning_rate": 1.51976686287243e-06,
196
- "logits/chosen": -0.4082161486148834,
197
- "logits/rejected": -0.3946232795715332,
198
- "logps/chosen": -385.8199768066406,
199
- "logps/rejected": -287.4693603515625,
200
- "loss": 0.2727,
201
- "rewards/accuracies": 0.65625,
202
- "rewards/chosen": -0.8687955737113953,
203
- "rewards/margins": 0.3416849970817566,
204
- "rewards/rejected": -1.2104805707931519,
205
  "step": 140
206
  },
207
  {
208
  "epoch": 0.42,
209
- "learning_rate": 1.4332498483042636e-06,
210
- "logits/chosen": -0.32807427644729614,
211
- "logits/rejected": -0.34388530254364014,
212
- "logps/chosen": -380.92181396484375,
213
- "logps/rejected": -347.18682861328125,
214
- "loss": 0.2712,
215
- "rewards/accuracies": 0.637499988079071,
216
- "rewards/chosen": -1.110830545425415,
217
- "rewards/margins": 0.5045827627182007,
218
- "rewards/rejected": -1.6154134273529053,
219
  "step": 150
220
  },
221
  {
222
  "epoch": 0.45,
223
- "learning_rate": 1.3425342215818716e-06,
224
- "logits/chosen": -0.4168078899383545,
225
- "logits/rejected": -0.39402490854263306,
226
- "logps/chosen": -463.38140869140625,
227
- "logps/rejected": -365.8263854980469,
228
- "loss": 0.2538,
229
- "rewards/accuracies": 0.768750011920929,
230
- "rewards/chosen": -1.1672489643096924,
231
- "rewards/margins": 0.6873351335525513,
232
- "rewards/rejected": -1.854583978652954,
233
  "step": 160
234
  },
235
  {
236
  "epoch": 0.48,
237
- "learning_rate": 1.248499105304894e-06,
238
- "logits/chosen": -0.4047406315803528,
239
- "logits/rejected": -0.4051821827888489,
240
- "logps/chosen": -429.53961181640625,
241
- "logps/rejected": -394.43450927734375,
242
- "loss": 0.2281,
243
- "rewards/accuracies": 0.675000011920929,
244
- "rewards/chosen": -1.3412402868270874,
245
- "rewards/margins": 0.7463828921318054,
246
- "rewards/rejected": -2.087623119354248,
247
  "step": 170
248
  },
249
  {
250
  "epoch": 0.51,
251
- "learning_rate": 1.1520557911533388e-06,
252
- "logits/chosen": -0.5203784704208374,
253
- "logits/rejected": -0.4828321933746338,
254
- "logps/chosen": -459.611572265625,
255
- "logps/rejected": -371.26312255859375,
256
- "loss": 0.2364,
257
- "rewards/accuracies": 0.59375,
258
- "rewards/chosen": -1.5552146434783936,
259
- "rewards/margins": 0.40483903884887695,
260
- "rewards/rejected": -1.96005380153656,
261
  "step": 180
262
  },
263
  {
264
  "epoch": 0.53,
265
- "learning_rate": 1.0541389085854176e-06,
266
- "logits/chosen": -0.4102029800415039,
267
- "logits/rejected": -0.36990243196487427,
268
- "logps/chosen": -395.46234130859375,
269
- "logps/rejected": -275.96533203125,
270
- "loss": 0.2695,
271
- "rewards/accuracies": 0.6812499761581421,
272
- "rewards/chosen": -0.7516598701477051,
273
- "rewards/margins": 0.5604479908943176,
274
- "rewards/rejected": -1.312107801437378,
275
  "step": 190
276
  },
277
  {
278
  "epoch": 0.56,
279
- "learning_rate": 9.556973673696213e-07,
280
- "logits/chosen": -0.44092226028442383,
281
- "logits/rejected": -0.3557354211807251,
282
- "logps/chosen": -399.0208435058594,
283
- "logps/rejected": -312.0525817871094,
284
- "loss": 0.272,
285
- "rewards/accuracies": 0.6000000238418579,
286
- "rewards/chosen": -1.0618005990982056,
287
- "rewards/margins": 0.38875648379325867,
288
- "rewards/rejected": -1.450556993484497,
289
  "step": 200
290
  },
291
  {
292
  "epoch": 0.59,
293
- "learning_rate": 8.576851617267149e-07,
294
- "logits/chosen": -0.4460463523864746,
295
- "logits/rejected": -0.4336074888706207,
296
- "logps/chosen": -500.722412109375,
297
- "logps/rejected": -389.33233642578125,
298
- "loss": 0.245,
299
- "rewards/accuracies": 0.668749988079071,
300
- "rewards/chosen": -1.3941071033477783,
301
- "rewards/margins": 0.843894362449646,
302
- "rewards/rejected": -2.2380013465881348,
303
  "step": 210
304
  },
305
  {
306
  "epoch": 0.62,
307
- "learning_rate": 7.610521251984419e-07,
308
- "logits/chosen": -0.4878757894039154,
309
- "logits/rejected": -0.4867759346961975,
310
- "logps/chosen": -469.9151306152344,
311
- "logps/rejected": -378.65228271484375,
312
- "loss": 0.2402,
313
- "rewards/accuracies": 0.6875,
314
- "rewards/chosen": -1.3634860515594482,
315
- "rewards/margins": 0.7561300992965698,
316
- "rewards/rejected": -2.1196160316467285,
317
  "step": 220
318
  },
319
  {
320
  "epoch": 0.65,
321
- "learning_rate": 6.667347258372558e-07,
322
- "logits/chosen": -0.44877737760543823,
323
- "logits/rejected": -0.4299391210079193,
324
- "logps/chosen": -442.70233154296875,
325
- "logps/rejected": -345.2084655761719,
326
- "loss": 0.215,
327
- "rewards/accuracies": 0.606249988079071,
328
- "rewards/chosen": -1.3881723880767822,
329
- "rewards/margins": 0.553728461265564,
330
- "rewards/rejected": -1.9419008493423462,
331
  "step": 230
332
  },
333
  {
334
  "epoch": 0.67,
335
- "learning_rate": 5.756469909206334e-07,
336
- "logits/chosen": -0.46246570348739624,
337
- "logits/rejected": -0.3849286139011383,
338
- "logps/chosen": -504.61883544921875,
339
- "logps/rejected": -392.6059875488281,
340
- "loss": 0.1812,
341
- "rewards/accuracies": 0.6000000238418579,
342
- "rewards/chosen": -1.5720837116241455,
343
- "rewards/margins": 0.6570929288864136,
344
- "rewards/rejected": -2.2291767597198486,
345
  "step": 240
346
  },
347
  {
348
  "epoch": 0.7,
349
- "learning_rate": 4.88671649138311e-07,
350
- "logits/chosen": -0.46375352144241333,
351
- "logits/rejected": -0.41425347328186035,
352
- "logps/chosen": -511.947265625,
353
- "logps/rejected": -397.8071594238281,
354
- "loss": 0.1967,
355
- "rewards/accuracies": 0.706250011920929,
356
- "rewards/chosen": -1.5960875749588013,
357
- "rewards/margins": 0.8036051988601685,
358
- "rewards/rejected": -2.3996925354003906,
359
  "step": 250
360
  },
361
  {
362
  "epoch": 0.73,
363
- "learning_rate": 4.0665157609325563e-07,
364
- "logits/chosen": -0.5020807981491089,
365
- "logits/rejected": -0.4912947714328766,
366
- "logps/chosen": -487.6549377441406,
367
- "logps/rejected": -400.78643798828125,
368
- "loss": 0.2117,
369
  "rewards/accuracies": 0.6812499761581421,
370
- "rewards/chosen": -1.5476312637329102,
371
- "rewards/margins": 0.7439574003219604,
372
- "rewards/rejected": -2.291588544845581,
373
  "step": 260
374
  },
375
  {
376
  "epoch": 0.76,
377
- "learning_rate": 3.303816260177894e-07,
378
- "logits/chosen": -0.4641796052455902,
379
- "logits/rejected": -0.49887222051620483,
380
- "logps/chosen": -459.90106201171875,
381
- "logps/rejected": -416.5171813964844,
382
- "loss": 0.2198,
383
- "rewards/accuracies": 0.606249988079071,
384
- "rewards/chosen": -1.617098093032837,
385
- "rewards/margins": 0.6207329034805298,
386
- "rewards/rejected": -2.237830877304077,
387
  "step": 270
388
  },
389
  {
390
  "epoch": 0.79,
391
- "learning_rate": 2.6060092886346885e-07,
392
- "logits/chosen": -0.39373284578323364,
393
- "logits/rejected": -0.38700538873672485,
394
- "logps/chosen": -497.85321044921875,
395
- "logps/rejected": -401.1759338378906,
396
- "loss": 0.2288,
397
- "rewards/accuracies": 0.7124999761581421,
398
- "rewards/chosen": -1.4469443559646606,
399
- "rewards/margins": 0.7936018705368042,
400
- "rewards/rejected": -2.240546226501465,
401
  "step": 280
402
  },
403
  {
404
  "epoch": 0.81,
405
- "learning_rate": 1.9798572741341148e-07,
406
- "logits/chosen": -0.3860500752925873,
407
- "logits/rejected": -0.3293386399745941,
408
- "logps/chosen": -446.89111328125,
409
- "logps/rejected": -367.6509094238281,
410
- "loss": 0.226,
411
  "rewards/accuracies": 0.706250011920929,
412
- "rewards/chosen": -1.3792494535446167,
413
- "rewards/margins": 0.6391651034355164,
414
- "rewards/rejected": -2.0184144973754883,
415
  "step": 290
416
  },
417
  {
418
  "epoch": 0.84,
419
- "learning_rate": 1.4314282383241095e-07,
420
- "logits/chosen": -0.43343037366867065,
421
- "logits/rejected": -0.4359092116355896,
422
- "logps/chosen": -452.3334045410156,
423
- "logps/rejected": -351.99755859375,
424
- "loss": 0.2277,
425
- "rewards/accuracies": 0.6812499761581421,
426
- "rewards/chosen": -1.0607109069824219,
427
- "rewards/margins": 0.8778518438339233,
428
- "rewards/rejected": -1.9385627508163452,
429
  "step": 300
430
  },
431
  {
432
  "epoch": 0.87,
433
- "learning_rate": 9.660369916414013e-08,
434
- "logits/chosen": -0.4283617436885834,
435
- "logits/rejected": -0.43107232451438904,
436
- "logps/chosen": -437.2748107910156,
437
- "logps/rejected": -371.6737976074219,
438
- "loss": 0.2184,
439
- "rewards/accuracies": 0.643750011920929,
440
- "rewards/chosen": -1.2600263357162476,
441
- "rewards/margins": 0.5959557294845581,
442
- "rewards/rejected": -1.8559820652008057,
443
  "step": 310
444
  },
445
  {
446
  "epoch": 0.9,
447
- "learning_rate": 5.881936276323462e-08,
448
- "logits/chosen": -0.3460317552089691,
449
- "logits/rejected": -0.34460192918777466,
450
- "logps/chosen": -416.1355895996094,
451
- "logps/rejected": -334.4111328125,
452
- "loss": 0.2229,
453
- "rewards/accuracies": 0.612500011920929,
454
- "rewards/chosen": -1.2378016710281372,
455
- "rewards/margins": 0.5023729205131531,
456
- "rewards/rejected": -1.7401745319366455,
457
  "step": 320
458
  },
459
  {
460
  "epoch": 0.93,
461
- "learning_rate": 3.015598157625598e-08,
462
- "logits/chosen": -0.46672359108924866,
463
- "logits/rejected": -0.43555861711502075,
464
- "logps/chosen": -527.4755249023438,
465
- "logps/rejected": -394.07135009765625,
466
- "loss": 0.2113,
467
- "rewards/accuracies": 0.643750011920929,
468
- "rewards/chosen": -1.4215977191925049,
469
- "rewards/margins": 0.7233748435974121,
470
- "rewards/rejected": -2.144972324371338,
471
  "step": 330
472
  },
473
  {
474
  "epoch": 0.96,
475
- "learning_rate": 1.0891331628063882e-08,
476
- "logits/chosen": -0.43030333518981934,
477
- "logits/rejected": -0.39171096682548523,
478
- "logps/chosen": -477.61883544921875,
479
- "logps/rejected": -401.7641906738281,
480
- "loss": 0.2215,
481
- "rewards/accuracies": 0.6875,
482
- "rewards/chosen": -1.4620156288146973,
483
- "rewards/margins": 0.7660819888114929,
484
- "rewards/rejected": -2.228097677230835,
485
  "step": 340
486
  },
487
  {
488
  "epoch": 0.98,
489
- "learning_rate": 1.212106102131849e-09,
490
- "logits/chosen": -0.45886820554733276,
491
- "logits/rejected": -0.4877847731113434,
492
- "logps/chosen": -446.5228576660156,
493
- "logps/rejected": -395.54852294921875,
494
- "loss": 0.2179,
495
- "rewards/accuracies": 0.7124999761581421,
496
- "rewards/chosen": -1.3368722200393677,
497
- "rewards/margins": 0.8557122945785522,
498
- "rewards/rejected": -2.19258451461792,
499
  "step": 350
500
  },
501
  {
502
- "epoch": 1.0,
503
- "step": 355,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
  "total_flos": 0.0,
505
- "train_loss": 0.30111024614790793,
506
- "train_runtime": 5278.2802,
507
- "train_samples_per_second": 8.629,
508
  "train_steps_per_second": 0.067
509
  }
510
  ],
511
  "logging_steps": 10,
512
- "max_steps": 355,
513
- "num_train_epochs": 1,
514
  "save_steps": 10000,
515
  "total_flos": 0.0,
516
  "trial_name": null,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.9950825430277486,
5
  "eval_steps": 10000,
6
+ "global_step": 710,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.03,
13
+ "learning_rate": 2.8169014084507043e-07,
14
+ "logits/chosen": -0.023287910968065262,
15
+ "logits/rejected": 0.045911647379398346,
16
+ "logps/chosen": -317.10211181640625,
17
+ "logps/rejected": -207.3465118408203,
18
+ "loss": 0.5132,
19
+ "rewards/accuracies": 0.36250001192092896,
20
+ "rewards/chosen": -0.0002548714110162109,
21
+ "rewards/margins": -0.0012714166659861803,
22
+ "rewards/rejected": 0.0010165453422814608,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.06,
27
+ "learning_rate": 5.633802816901409e-07,
28
+ "logits/chosen": -0.046765245497226715,
29
+ "logits/rejected": -0.011256822384893894,
30
+ "logps/chosen": -293.9908752441406,
31
+ "logps/rejected": -202.18402099609375,
32
+ "loss": 0.5075,
33
+ "rewards/accuracies": 0.6312500238418579,
34
+ "rewards/chosen": 0.000761970819439739,
35
+ "rewards/margins": 0.008972947485744953,
36
+ "rewards/rejected": -0.008210976608097553,
37
  "step": 20
38
  },
39
  {
40
  "epoch": 0.08,
41
+ "learning_rate": 8.450704225352112e-07,
42
+ "logits/chosen": -0.03957567363977432,
43
+ "logits/rejected": 0.012707856483757496,
44
+ "logps/chosen": -362.0414123535156,
45
+ "logps/rejected": -252.41909790039062,
46
+ "loss": 0.5164,
47
  "rewards/accuracies": 0.574999988079071,
48
+ "rewards/chosen": -0.010256086476147175,
49
+ "rewards/margins": 0.03359478712081909,
50
+ "rewards/rejected": -0.04385087639093399,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.11,
55
+ "learning_rate": 1.1267605633802817e-06,
56
+ "logits/chosen": -0.041558656841516495,
57
+ "logits/rejected": 0.009781199507415295,
58
+ "logps/chosen": -295.82379150390625,
59
+ "logps/rejected": -205.8635711669922,
60
+ "loss": 0.5435,
61
+ "rewards/accuracies": 0.612500011920929,
62
+ "rewards/chosen": -0.059349894523620605,
63
+ "rewards/margins": 0.10006687790155411,
64
+ "rewards/rejected": -0.1594167947769165,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 0.14,
69
+ "learning_rate": 1.408450704225352e-06,
70
+ "logits/chosen": 0.018238263204693794,
71
+ "logits/rejected": 0.06442906707525253,
72
+ "logps/chosen": -377.36151123046875,
73
+ "logps/rejected": -239.4322052001953,
74
+ "loss": 0.5573,
75
+ "rewards/accuracies": 0.65625,
76
+ "rewards/chosen": -0.0925443023443222,
77
+ "rewards/margins": 0.19226112961769104,
78
+ "rewards/rejected": -0.28480541706085205,
79
  "step": 50
80
  },
81
  {
82
  "epoch": 0.17,
83
+ "learning_rate": 1.6901408450704225e-06,
84
+ "logits/chosen": -0.07315438240766525,
85
+ "logits/rejected": -0.011159000918269157,
86
+ "logps/chosen": -319.10455322265625,
87
+ "logps/rejected": -229.79824829101562,
88
+ "loss": 0.5171,
89
+ "rewards/accuracies": 0.6000000238418579,
90
+ "rewards/chosen": 0.007246834225952625,
91
+ "rewards/margins": 0.14170756936073303,
92
+ "rewards/rejected": -0.13446073234081268,
93
  "step": 60
94
  },
95
  {
96
  "epoch": 0.2,
97
+ "learning_rate": 1.971830985915493e-06,
98
+ "logits/chosen": -0.11702132225036621,
99
+ "logits/rejected": -0.06476567685604095,
100
+ "logps/chosen": -346.78509521484375,
101
+ "logps/rejected": -251.37466430664062,
102
+ "loss": 0.5065,
103
  "rewards/accuracies": 0.6625000238418579,
104
+ "rewards/chosen": 0.046701572835445404,
105
+ "rewards/margins": 0.21416659653186798,
106
+ "rewards/rejected": -0.16746501624584198,
107
  "step": 70
108
  },
109
  {
110
  "epoch": 0.22,
111
+ "learning_rate": 1.9990212265199736e-06,
112
+ "logits/chosen": -0.011112675070762634,
113
+ "logits/rejected": 0.0456564836204052,
114
+ "logps/chosen": -316.1672668457031,
115
+ "logps/rejected": -235.3020782470703,
116
+ "loss": 0.5047,
117
+ "rewards/accuracies": 0.5874999761581421,
118
+ "rewards/chosen": -0.035639651119709015,
119
+ "rewards/margins": 0.16450051963329315,
120
+ "rewards/rejected": -0.20014019310474396,
121
  "step": 80
122
  },
123
  {
124
  "epoch": 0.25,
125
+ "learning_rate": 1.995640271796129e-06,
126
+ "logits/chosen": -0.06501901894807816,
127
+ "logits/rejected": -0.005411559250205755,
128
+ "logps/chosen": -363.4024963378906,
129
+ "logps/rejected": -284.32769775390625,
130
+ "loss": 0.4601,
131
+ "rewards/accuracies": 0.643750011920929,
132
+ "rewards/chosen": 0.007450317498296499,
133
+ "rewards/margins": 0.165993332862854,
134
+ "rewards/rejected": -0.15854302048683167,
135
  "step": 90
136
  },
137
  {
138
  "epoch": 0.28,
139
+ "learning_rate": 1.9898532207817787e-06,
140
+ "logits/chosen": -0.06524594128131866,
141
+ "logits/rejected": 0.0007957996567711234,
142
+ "logps/chosen": -287.18389892578125,
143
+ "logps/rejected": -225.53396606445312,
144
+ "loss": 0.4422,
145
+ "rewards/accuracies": 0.5562499761581421,
146
+ "rewards/chosen": -0.07524963468313217,
147
+ "rewards/margins": 0.12944354116916656,
148
+ "rewards/rejected": -0.20469316840171814,
149
  "step": 100
150
  },
151
  {
152
  "epoch": 0.31,
153
+ "learning_rate": 1.9816740586504575e-06,
154
+ "logits/chosen": -0.05093986541032791,
155
+ "logits/rejected": -0.023259857669472694,
156
+ "logps/chosen": -346.22552490234375,
157
+ "logps/rejected": -273.53070068359375,
158
+ "loss": 0.4475,
159
+ "rewards/accuracies": 0.606249988079071,
160
+ "rewards/chosen": -0.15637585520744324,
161
+ "rewards/margins": 0.209220290184021,
162
+ "rewards/rejected": -0.36559611558914185,
163
  "step": 110
164
  },
165
  {
166
  "epoch": 0.34,
167
+ "learning_rate": 1.971122551428331e-06,
168
+ "logits/chosen": -0.1468496024608612,
169
+ "logits/rejected": -0.08133789896965027,
170
+ "logps/chosen": -362.974609375,
171
+ "logps/rejected": -242.92843627929688,
172
+ "loss": 0.4502,
173
+ "rewards/accuracies": 0.6625000238418579,
174
+ "rewards/chosen": -0.23508784174919128,
175
+ "rewards/margins": 0.3363807797431946,
176
+ "rewards/rejected": -0.571468710899353,
177
  "step": 120
178
  },
179
  {
180
  "epoch": 0.37,
181
+ "learning_rate": 1.9582241982269803e-06,
182
+ "logits/chosen": -0.18417930603027344,
183
+ "logits/rejected": -0.09607286751270294,
184
+ "logps/chosen": -402.79461669921875,
185
+ "logps/rejected": -280.8565979003906,
186
+ "loss": 0.4381,
187
+ "rewards/accuracies": 0.6625000238418579,
188
+ "rewards/chosen": -0.2887083888053894,
189
+ "rewards/margins": 0.3183668255805969,
190
+ "rewards/rejected": -0.6070752143859863,
191
  "step": 130
192
  },
193
  {
194
  "epoch": 0.39,
195
+ "learning_rate": 1.9430101696214336e-06,
196
+ "logits/chosen": -0.24745997786521912,
197
+ "logits/rejected": -0.15786592662334442,
198
+ "logps/chosen": -370.55670166015625,
199
+ "logps/rejected": -257.5110168457031,
200
+ "loss": 0.3939,
201
+ "rewards/accuracies": 0.643750011920929,
202
+ "rewards/chosen": -0.29878249764442444,
203
+ "rewards/margins": 0.2513308823108673,
204
+ "rewards/rejected": -0.5501133799552917,
205
  "step": 140
206
  },
207
  {
208
  "epoch": 0.42,
209
+ "learning_rate": 1.9255172323223463e-06,
210
+ "logits/chosen": -0.16580908000469208,
211
+ "logits/rejected": -0.11968035995960236,
212
+ "logps/chosen": -318.001708984375,
213
+ "logps/rejected": -258.27215576171875,
214
+ "loss": 0.4097,
215
+ "rewards/accuracies": 0.6499999761581421,
216
+ "rewards/chosen": -0.3942390978336334,
217
+ "rewards/margins": 0.2680663764476776,
218
+ "rewards/rejected": -0.662305474281311,
219
  "step": 150
220
  },
221
  {
222
  "epoch": 0.45,
223
+ "learning_rate": 1.905787660324391e-06,
224
+ "logits/chosen": -0.249535471200943,
225
+ "logits/rejected": -0.19402232766151428,
226
+ "logps/chosen": -375.15972900390625,
227
+ "logps/rejected": -257.4052429199219,
228
+ "loss": 0.3704,
229
+ "rewards/accuracies": 0.59375,
230
+ "rewards/chosen": -0.31844764947891235,
231
+ "rewards/margins": 0.20566639304161072,
232
+ "rewards/rejected": -0.5241140127182007,
233
  "step": 160
234
  },
235
  {
236
  "epoch": 0.48,
237
+ "learning_rate": 1.8838691327455609e-06,
238
+ "logits/chosen": -0.2230146825313568,
239
+ "logits/rejected": -0.17071188986301422,
240
+ "logps/chosen": -381.15631103515625,
241
+ "logps/rejected": -274.1378173828125,
242
+ "loss": 0.405,
243
+ "rewards/accuracies": 0.625,
244
+ "rewards/chosen": -0.3613168001174927,
245
+ "rewards/margins": 0.29612740874290466,
246
+ "rewards/rejected": -0.6574442386627197,
247
  "step": 170
248
  },
249
  {
250
  "epoch": 0.51,
251
+ "learning_rate": 1.8598146186042808e-06,
252
+ "logits/chosen": -0.16511419415473938,
253
+ "logits/rejected": -0.11704270541667938,
254
+ "logps/chosen": -397.783203125,
255
+ "logps/rejected": -304.19879150390625,
256
+ "loss": 0.437,
257
+ "rewards/accuracies": 0.668749988079071,
258
+ "rewards/chosen": -0.5645850300788879,
259
+ "rewards/margins": 0.3246172070503235,
260
+ "rewards/rejected": -0.8892022967338562,
261
  "step": 180
262
  },
263
  {
264
  "epoch": 0.53,
265
+ "learning_rate": 1.8336822488127723e-06,
266
+ "logits/chosen": -0.27783218026161194,
267
+ "logits/rejected": -0.2260020524263382,
268
+ "logps/chosen": -405.33746337890625,
269
+ "logps/rejected": -301.2474365234375,
270
+ "loss": 0.3796,
271
+ "rewards/accuracies": 0.581250011920929,
272
+ "rewards/chosen": -0.5456187725067139,
273
+ "rewards/margins": 0.2841017246246338,
274
+ "rewards/rejected": -0.8297204971313477,
275
  "step": 190
276
  },
277
  {
278
  "epoch": 0.56,
279
+ "learning_rate": 1.805535175696026e-06,
280
+ "logits/chosen": -0.30949804186820984,
281
+ "logits/rejected": -0.28316643834114075,
282
+ "logps/chosen": -362.0370178222656,
283
+ "logps/rejected": -315.319580078125,
284
+ "loss": 0.3464,
285
+ "rewards/accuracies": 0.6312500238418579,
286
+ "rewards/chosen": -0.5522123575210571,
287
+ "rewards/margins": 0.22715091705322266,
288
+ "rewards/rejected": -0.7793632745742798,
289
  "step": 200
290
  },
291
  {
292
  "epoch": 0.59,
293
+ "learning_rate": 1.7754414203758602e-06,
294
+ "logits/chosen": -0.28907471895217896,
295
+ "logits/rejected": -0.2575899660587311,
296
+ "logps/chosen": -353.92681884765625,
297
+ "logps/rejected": -277.845458984375,
298
+ "loss": 0.3405,
299
+ "rewards/accuracies": 0.643750011920929,
300
+ "rewards/chosen": -0.6586817502975464,
301
+ "rewards/margins": 0.22525759041309357,
302
+ "rewards/rejected": -0.883939266204834,
303
  "step": 210
304
  },
305
  {
306
  "epoch": 0.62,
307
+ "learning_rate": 1.7434737083888904e-06,
308
+ "logits/chosen": -0.31758958101272583,
309
+ "logits/rejected": -0.3126702904701233,
310
+ "logps/chosen": -361.3816833496094,
311
+ "logps/rejected": -286.2841796875,
312
+ "loss": 0.3756,
313
+ "rewards/accuracies": 0.581250011920929,
314
+ "rewards/chosen": -0.6541340351104736,
315
+ "rewards/margins": 0.27324938774108887,
316
+ "rewards/rejected": -0.9273834228515625,
317
  "step": 220
318
  },
319
  {
320
  "epoch": 0.65,
321
+ "learning_rate": 1.709709293935662e-06,
322
+ "logits/chosen": -0.37347474694252014,
323
+ "logits/rejected": -0.3521464467048645,
324
+ "logps/chosen": -406.6834411621094,
325
+ "logps/rejected": -342.6564025878906,
326
+ "loss": 0.3197,
327
+ "rewards/accuracies": 0.625,
328
+ "rewards/chosen": -0.7559819221496582,
329
+ "rewards/margins": 0.2710956037044525,
330
+ "rewards/rejected": -1.0270774364471436,
331
  "step": 230
332
  },
333
  {
334
  "epoch": 0.67,
335
+ "learning_rate": 1.6742297731856636e-06,
336
+ "logits/chosen": -0.3079659342765808,
337
+ "logits/rejected": -0.2845328748226166,
338
+ "logps/chosen": -397.04522705078125,
339
+ "logps/rejected": -340.85748291015625,
340
+ "loss": 0.2793,
341
+ "rewards/accuracies": 0.6187499761581421,
342
+ "rewards/chosen": -0.8156784176826477,
343
+ "rewards/margins": 0.31112828850746155,
344
+ "rewards/rejected": -1.1268064975738525,
345
  "step": 240
346
  },
347
  {
348
  "epoch": 0.7,
349
+ "learning_rate": 1.6371208870894001e-06,
350
+ "logits/chosen": -0.2892235517501831,
351
+ "logits/rejected": -0.2505740821361542,
352
+ "logps/chosen": -432.062255859375,
353
+ "logps/rejected": -362.1026306152344,
354
+ "loss": 0.2851,
355
+ "rewards/accuracies": 0.6187499761581421,
356
+ "rewards/chosen": -1.0684902667999268,
357
+ "rewards/margins": 0.2997783124446869,
358
+ "rewards/rejected": -1.368268609046936,
359
  "step": 250
360
  },
361
  {
362
  "epoch": 0.73,
363
+ "learning_rate": 1.5984723141740574e-06,
364
+ "logits/chosen": -0.36091741919517517,
365
+ "logits/rejected": -0.3296750485897064,
366
+ "logps/chosen": -361.447021484375,
367
+ "logps/rejected": -308.85284423828125,
368
+ "loss": 0.3182,
369
  "rewards/accuracies": 0.6812499761581421,
370
+ "rewards/chosen": -0.8450363874435425,
371
+ "rewards/margins": 0.2505396008491516,
372
+ "rewards/rejected": -1.0955758094787598,
373
  "step": 260
374
  },
375
  {
376
  "epoch": 0.76,
377
+ "learning_rate": 1.5583774538234882e-06,
378
+ "logits/chosen": -0.26542288064956665,
379
+ "logits/rejected": -0.2120533287525177,
380
+ "logps/chosen": -402.22021484375,
381
+ "logps/rejected": -305.82586669921875,
382
+ "loss": 0.3267,
383
+ "rewards/accuracies": 0.6499999761581421,
384
+ "rewards/chosen": -0.8468425869941711,
385
+ "rewards/margins": 0.3782137632369995,
386
+ "rewards/rejected": -1.2250562906265259,
387
  "step": 270
388
  },
389
  {
390
  "epoch": 0.79,
391
+ "learning_rate": 1.5169332005662589e-06,
392
+ "logits/chosen": -0.3561258912086487,
393
+ "logits/rejected": -0.3128196597099304,
394
+ "logps/chosen": -451.0619201660156,
395
+ "logps/rejected": -341.95355224609375,
396
+ "loss": 0.3046,
397
+ "rewards/accuracies": 0.731249988079071,
398
+ "rewards/chosen": -0.7824829816818237,
399
+ "rewards/margins": 0.41102856397628784,
400
+ "rewards/rejected": -1.1935116052627563,
401
  "step": 280
402
  },
403
  {
404
  "epoch": 0.81,
405
+ "learning_rate": 1.474239709917218e-06,
406
+ "logits/chosen": -0.32344120740890503,
407
+ "logits/rejected": -0.30548325181007385,
408
+ "logps/chosen": -421.78668212890625,
409
+ "logps/rejected": -358.4979553222656,
410
+ "loss": 0.2834,
411
  "rewards/accuracies": 0.706250011920929,
412
+ "rewards/chosen": -0.8403790593147278,
413
+ "rewards/margins": 0.33261531591415405,
414
+ "rewards/rejected": -1.1729944944381714,
415
  "step": 290
416
  },
417
  {
418
  "epoch": 0.84,
419
+ "learning_rate": 1.430400156338457e-06,
420
+ "logits/chosen": -0.3465970456600189,
421
+ "logits/rejected": -0.3216686546802521,
422
+ "logps/chosen": -420.833740234375,
423
+ "logps/rejected": -353.2898864746094,
424
+ "loss": 0.2659,
425
+ "rewards/accuracies": 0.6312500238418579,
426
+ "rewards/chosen": -0.7795278429985046,
427
+ "rewards/margins": 0.21080787479877472,
428
+ "rewards/rejected": -0.9903356432914734,
429
  "step": 300
430
  },
431
  {
432
  "epoch": 0.87,
433
+ "learning_rate": 1.3855204839045892e-06,
434
+ "logits/chosen": -0.3354475796222687,
435
+ "logits/rejected": -0.35353055596351624,
436
+ "logps/chosen": -389.02545166015625,
437
+ "logps/rejected": -333.0558776855469,
438
+ "loss": 0.2704,
439
+ "rewards/accuracies": 0.59375,
440
+ "rewards/chosen": -1.0605363845825195,
441
+ "rewards/margins": 0.15861138701438904,
442
+ "rewards/rejected": -1.219147801399231,
443
  "step": 310
444
  },
445
  {
446
  "epoch": 0.9,
447
+ "learning_rate": 1.3397091502748927e-06,
448
+ "logits/chosen": -0.39062121510505676,
449
+ "logits/rejected": -0.32264286279678345,
450
+ "logps/chosen": -479.73822021484375,
451
+ "logps/rejected": -373.89080810546875,
452
+ "loss": 0.3049,
453
+ "rewards/accuracies": 0.625,
454
+ "rewards/chosen": -0.9773856997489929,
455
+ "rewards/margins": 0.4283124506473541,
456
+ "rewards/rejected": -1.4056981801986694,
457
  "step": 320
458
  },
459
  {
460
  "epoch": 0.93,
461
+ "learning_rate": 1.2930768645910449e-06,
462
+ "logits/chosen": -0.302212119102478,
463
+ "logits/rejected": -0.2810123860836029,
464
+ "logps/chosen": -395.77923583984375,
465
+ "logps/rejected": -299.944091796875,
466
+ "loss": 0.3347,
467
+ "rewards/accuracies": 0.6937500238418579,
468
+ "rewards/chosen": -0.663515567779541,
469
+ "rewards/margins": 0.3672025799751282,
470
+ "rewards/rejected": -1.0307180881500244,
471
  "step": 330
472
  },
473
  {
474
  "epoch": 0.96,
475
+ "learning_rate": 1.2457363199338495e-06,
476
+ "logits/chosen": -0.2840663194656372,
477
+ "logits/rejected": -0.2467239648103714,
478
+ "logps/chosen": -466.4864196777344,
479
+ "logps/rejected": -333.0160217285156,
480
+ "loss": 0.3193,
481
+ "rewards/accuracies": 0.675000011920929,
482
+ "rewards/chosen": -0.7206543684005737,
483
+ "rewards/margins": 0.4044593274593353,
484
+ "rewards/rejected": -1.1251137256622314,
485
  "step": 340
486
  },
487
  {
488
  "epoch": 0.98,
489
+ "learning_rate": 1.1978019209855173e-06,
490
+ "logits/chosen": -0.32623833417892456,
491
+ "logits/rejected": -0.26132825016975403,
492
+ "logps/chosen": -438.72607421875,
493
+ "logps/rejected": -354.7231140136719,
494
+ "loss": 0.2759,
495
+ "rewards/accuracies": 0.6625000238418579,
496
+ "rewards/chosen": -0.7977033853530884,
497
+ "rewards/margins": 0.4479256272315979,
498
+ "rewards/rejected": -1.245629072189331,
499
  "step": 350
500
  },
501
  {
502
+ "epoch": 1.01,
503
+ "learning_rate": 1.14938950755563e-06,
504
+ "logits/chosen": -0.31017881631851196,
505
+ "logits/rejected": -0.296619713306427,
506
+ "logps/chosen": -379.47064208984375,
507
+ "logps/rejected": -352.1556091308594,
508
+ "loss": 0.2171,
509
+ "rewards/accuracies": 0.6937500238418579,
510
+ "rewards/chosen": -0.9407274127006531,
511
+ "rewards/margins": 0.6167389154434204,
512
+ "rewards/rejected": -1.5574663877487183,
513
+ "step": 360
514
+ },
515
+ {
516
+ "epoch": 1.04,
517
+ "learning_rate": 1.1006160746389332e-06,
518
+ "logits/chosen": -0.45612698793411255,
519
+ "logits/rejected": -0.4245428442955017,
520
+ "logps/chosen": -480.1871032714844,
521
+ "logps/rejected": -501.958740234375,
522
+ "loss": 0.0979,
523
+ "rewards/accuracies": 0.824999988079071,
524
+ "rewards/chosen": -1.4988553524017334,
525
+ "rewards/margins": 1.2996289730072021,
526
+ "rewards/rejected": -2.7984843254089355,
527
+ "step": 370
528
+ },
529
+ {
530
+ "epoch": 1.07,
531
+ "learning_rate": 1.0515994896814731e-06,
532
+ "logits/chosen": -0.32600560784339905,
533
+ "logits/rejected": -0.2724960744380951,
534
+ "logps/chosen": -578.824951171875,
535
+ "logps/rejected": -540.9967651367188,
536
+ "loss": 0.0553,
537
+ "rewards/accuracies": 0.8062499761581421,
538
+ "rewards/chosen": -2.1792449951171875,
539
+ "rewards/margins": 1.2840534448623657,
540
+ "rewards/rejected": -3.463298797607422,
541
+ "step": 380
542
+ },
543
+ {
544
+ "epoch": 1.1,
545
+ "learning_rate": 1.002458207738333e-06,
546
+ "logits/chosen": -0.25963398814201355,
547
+ "logits/rejected": -0.22093424201011658,
548
+ "logps/chosen": -530.6353759765625,
549
+ "logps/rejected": -565.1375122070312,
550
+ "loss": 0.0574,
551
+ "rewards/accuracies": 0.8500000238418579,
552
+ "rewards/chosen": -1.7918068170547485,
553
+ "rewards/margins": 1.450157880783081,
554
+ "rewards/rejected": -3.241964817047119,
555
+ "step": 390
556
+ },
557
+ {
558
+ "epoch": 1.12,
559
+ "learning_rate": 9.533109852113413e-07,
560
+ "logits/chosen": -0.21417060494422913,
561
+ "logits/rejected": -0.18584421277046204,
562
+ "logps/chosen": -536.5563354492188,
563
+ "logps/rejected": -544.37890625,
564
+ "loss": 0.047,
565
+ "rewards/accuracies": 0.8812500238418579,
566
+ "rewards/chosen": -1.9521188735961914,
567
+ "rewards/margins": 1.5740001201629639,
568
+ "rewards/rejected": -3.526118755340576,
569
+ "step": 400
570
+ },
571
+ {
572
+ "epoch": 1.15,
573
+ "learning_rate": 9.042765928585326e-07,
574
+ "logits/chosen": -0.12677066028118134,
575
+ "logits/rejected": -0.11945654451847076,
576
+ "logps/chosen": -547.3245849609375,
577
+ "logps/rejected": -567.9544067382812,
578
+ "loss": 0.0401,
579
+ "rewards/accuracies": 0.831250011920929,
580
+ "rewards/chosen": -2.1687557697296143,
581
+ "rewards/margins": 1.4874060153961182,
582
+ "rewards/rejected": -3.6561615467071533,
583
+ "step": 410
584
+ },
585
+ {
586
+ "epoch": 1.18,
587
+ "learning_rate": 8.554735287689148e-07,
588
+ "logits/chosen": -0.14333295822143555,
589
+ "logits/rejected": -0.09979396313428879,
590
+ "logps/chosen": -634.6866455078125,
591
+ "logps/rejected": -613.4561767578125,
592
+ "loss": 0.0348,
593
+ "rewards/accuracies": 0.831250011920929,
594
+ "rewards/chosen": -2.715937852859497,
595
+ "rewards/margins": 1.36255943775177,
596
+ "rewards/rejected": -4.078497409820557,
597
+ "step": 420
598
+ },
599
+ {
600
+ "epoch": 1.21,
601
+ "learning_rate": 8.070197319961782e-07,
602
+ "logits/chosen": -0.10927991569042206,
603
+ "logits/rejected": -0.020826727151870728,
604
+ "logps/chosen": -580.0132446289062,
605
+ "logps/rejected": -619.2887573242188,
606
+ "loss": 0.0341,
607
+ "rewards/accuracies": 0.800000011920929,
608
+ "rewards/chosen": -2.63529372215271,
609
+ "rewards/margins": 1.3534786701202393,
610
+ "rewards/rejected": -3.988771915435791,
611
+ "step": 430
612
+ },
613
+ {
614
+ "epoch": 1.24,
615
+ "learning_rate": 7.590322975433856e-07,
616
+ "logits/chosen": -0.1185801774263382,
617
+ "logits/rejected": -0.03847898915410042,
618
+ "logps/chosen": -641.1858520507812,
619
+ "logps/rejected": -655.4496459960938,
620
+ "loss": 0.0317,
621
+ "rewards/accuracies": 0.8687499761581421,
622
+ "rewards/chosen": -2.531560182571411,
623
+ "rewards/margins": 1.7570297718048096,
624
+ "rewards/rejected": -4.288589954376221,
625
+ "step": 440
626
+ },
627
+ {
628
+ "epoch": 1.26,
629
+ "learning_rate": 7.116271933874245e-07,
630
+ "logits/chosen": -0.001342842006124556,
631
+ "logits/rejected": -0.04314468055963516,
632
+ "logps/chosen": -561.9754638671875,
633
+ "logps/rejected": -599.1416625976562,
634
+ "loss": 0.0298,
635
+ "rewards/accuracies": 0.831250011920929,
636
+ "rewards/chosen": -2.415828227996826,
637
+ "rewards/margins": 1.5171138048171997,
638
+ "rewards/rejected": -3.9329421520233154,
639
+ "step": 450
640
+ },
641
+ {
642
+ "epoch": 1.29,
643
+ "learning_rate": 6.649189802270652e-07,
644
+ "logits/chosen": -0.042498912662267685,
645
+ "logits/rejected": -0.012727165594696999,
646
+ "logps/chosen": -590.1478881835938,
647
+ "logps/rejected": -614.4903564453125,
648
+ "loss": 0.0297,
649
+ "rewards/accuracies": 0.8062499761581421,
650
+ "rewards/chosen": -2.558908224105835,
651
+ "rewards/margins": 1.5000147819519043,
652
+ "rewards/rejected": -4.05892276763916,
653
+ "step": 460
654
+ },
655
+ {
656
+ "epoch": 1.32,
657
+ "learning_rate": 6.190205346318926e-07,
658
+ "logits/chosen": -0.005924136843532324,
659
+ "logits/rejected": 0.028465991839766502,
660
+ "logps/chosen": -631.77685546875,
661
+ "logps/rejected": -660.1925659179688,
662
+ "loss": 0.0263,
663
+ "rewards/accuracies": 0.84375,
664
+ "rewards/chosen": -2.74247145652771,
665
+ "rewards/margins": 1.6851346492767334,
666
+ "rewards/rejected": -4.427606105804443,
667
+ "step": 470
668
+ },
669
+ {
670
+ "epoch": 1.35,
671
+ "learning_rate": 5.740427762611604e-07,
672
+ "logits/chosen": 0.06077251955866814,
673
+ "logits/rejected": 0.14472587406635284,
674
+ "logps/chosen": -601.584716796875,
675
+ "logps/rejected": -623.94287109375,
676
+ "loss": 0.0282,
677
+ "rewards/accuracies": 0.831250011920929,
678
+ "rewards/chosen": -2.6236908435821533,
679
+ "rewards/margins": 1.5524755716323853,
680
+ "rewards/rejected": -4.176166534423828,
681
+ "step": 480
682
+ },
683
+ {
684
+ "epoch": 1.38,
685
+ "learning_rate": 5.300943998117749e-07,
686
+ "logits/chosen": 0.058325447142124176,
687
+ "logits/rejected": 0.08030878007411957,
688
+ "logps/chosen": -585.04052734375,
689
+ "logps/rejected": -604.9239501953125,
690
+ "loss": 0.0261,
691
+ "rewards/accuracies": 0.84375,
692
+ "rewards/chosen": -2.509859561920166,
693
+ "rewards/margins": 1.6018766164779663,
694
+ "rewards/rejected": -4.111736297607422,
695
+ "step": 490
696
+ },
697
+ {
698
+ "epoch": 1.4,
699
+ "learning_rate": 4.872816123431976e-07,
700
+ "logits/chosen": 0.1183939203619957,
701
+ "logits/rejected": 0.08584292232990265,
702
+ "logps/chosen": -526.6102294921875,
703
+ "logps/rejected": -640.744873046875,
704
+ "loss": 0.0244,
705
+ "rewards/accuracies": 0.8812500238418579,
706
+ "rewards/chosen": -2.509054660797119,
707
+ "rewards/margins": 1.6073827743530273,
708
+ "rewards/rejected": -4.1164374351501465,
709
+ "step": 500
710
+ },
711
+ {
712
+ "epoch": 1.43,
713
+ "learning_rate": 4.4570787661405e-07,
714
+ "logits/chosen": 0.16773128509521484,
715
+ "logits/rejected": 0.2288293093442917,
716
+ "logps/chosen": -553.6146850585938,
717
+ "logps/rejected": -577.7164306640625,
718
+ "loss": 0.0246,
719
+ "rewards/accuracies": 0.768750011920929,
720
+ "rewards/chosen": -2.75661039352417,
721
+ "rewards/margins": 1.317345380783081,
722
+ "rewards/rejected": -4.073955535888672,
723
+ "step": 510
724
+ },
725
+ {
726
+ "epoch": 1.46,
727
+ "learning_rate": 4.0547366105068347e-07,
728
+ "logits/chosen": 0.2202252447605133,
729
+ "logits/rejected": 0.26597005128860474,
730
+ "logps/chosen": -630.9942626953125,
731
+ "logps/rejected": -691.7838134765625,
732
+ "loss": 0.0242,
733
+ "rewards/accuracies": 0.831250011920929,
734
+ "rewards/chosen": -2.9170384407043457,
735
+ "rewards/margins": 1.8351190090179443,
736
+ "rewards/rejected": -4.752157688140869,
737
+ "step": 520
738
+ },
739
+ {
740
+ "epoch": 1.49,
741
+ "learning_rate": 3.666761969519528e-07,
742
+ "logits/chosen": 0.17129948735237122,
743
+ "logits/rejected": 0.2111537903547287,
744
+ "logps/chosen": -559.1640625,
745
+ "logps/rejected": -672.8851928710938,
746
+ "loss": 0.0224,
747
+ "rewards/accuracies": 0.8125,
748
+ "rewards/chosen": -2.7882485389709473,
749
+ "rewards/margins": 1.6843712329864502,
750
+ "rewards/rejected": -4.472619533538818,
751
+ "step": 530
752
+ },
753
+ {
754
+ "epoch": 1.52,
755
+ "learning_rate": 3.2940924351693213e-07,
756
+ "logits/chosen": 0.19039176404476166,
757
+ "logits/rejected": 0.21573393046855927,
758
+ "logps/chosen": -628.3604736328125,
759
+ "logps/rejected": -705.090087890625,
760
+ "loss": 0.0211,
761
+ "rewards/accuracies": 0.8125,
762
+ "rewards/chosen": -2.726500988006592,
763
+ "rewards/margins": 1.8815057277679443,
764
+ "rewards/rejected": -4.608006954193115,
765
+ "step": 540
766
+ },
767
+ {
768
+ "epoch": 1.55,
769
+ "learning_rate": 2.937628612634184e-07,
770
+ "logits/chosen": 0.23304316401481628,
771
+ "logits/rejected": 0.2361479252576828,
772
+ "logps/chosen": -604.546630859375,
773
+ "logps/rejected": -636.0623779296875,
774
+ "loss": 0.0197,
775
+ "rewards/accuracies": 0.84375,
776
+ "rewards/chosen": -2.8572418689727783,
777
+ "rewards/margins": 1.691147804260254,
778
+ "rewards/rejected": -4.548389911651611,
779
+ "step": 550
780
+ },
781
+ {
782
+ "epoch": 1.57,
783
+ "learning_rate": 2.598231943847916e-07,
784
+ "logits/chosen": 0.19534823298454285,
785
+ "logits/rejected": 0.2395998239517212,
786
+ "logps/chosen": -624.6097412109375,
787
+ "logps/rejected": -710.4221801757812,
788
+ "loss": 0.0194,
789
+ "rewards/accuracies": 0.84375,
790
+ "rewards/chosen": -2.994528293609619,
791
+ "rewards/margins": 1.8797197341918945,
792
+ "rewards/rejected": -4.8742475509643555,
793
+ "step": 560
794
+ },
795
+ {
796
+ "epoch": 1.6,
797
+ "learning_rate": 2.276722625711861e-07,
798
+ "logits/chosen": 0.11042364686727524,
799
+ "logits/rejected": 0.18681105971336365,
800
+ "logps/chosen": -684.1854248046875,
801
+ "logps/rejected": -786.4215087890625,
802
+ "loss": 0.0194,
803
+ "rewards/accuracies": 0.8812500238418579,
804
+ "rewards/chosen": -3.0811171531677246,
805
+ "rewards/margins": 2.1122710704803467,
806
+ "rewards/rejected": -5.19338846206665,
807
+ "step": 570
808
+ },
809
+ {
810
+ "epoch": 1.63,
811
+ "learning_rate": 1.973877627980699e-07,
812
+ "logits/chosen": 0.11730021238327026,
813
+ "logits/rejected": 0.1679680496454239,
814
+ "logps/chosen": -716.9166259765625,
815
+ "logps/rejected": -759.8760375976562,
816
+ "loss": 0.0171,
817
+ "rewards/accuracies": 0.84375,
818
+ "rewards/chosen": -3.1296448707580566,
819
+ "rewards/margins": 1.810140609741211,
820
+ "rewards/rejected": -4.939785480499268,
821
+ "step": 580
822
+ },
823
+ {
824
+ "epoch": 1.66,
825
+ "learning_rate": 1.6904288156123636e-07,
826
+ "logits/chosen": 0.25147971510887146,
827
+ "logits/rejected": 0.24109426140785217,
828
+ "logps/chosen": -578.8203735351562,
829
+ "logps/rejected": -653.1304321289062,
830
+ "loss": 0.0179,
831
+ "rewards/accuracies": 0.8374999761581421,
832
+ "rewards/chosen": -2.8619461059570312,
833
+ "rewards/margins": 1.7370202541351318,
834
+ "rewards/rejected": -4.598966598510742,
835
+ "step": 590
836
+ },
837
+ {
838
+ "epoch": 1.69,
839
+ "learning_rate": 1.4270611801196642e-07,
840
+ "logits/chosen": 0.10723473876714706,
841
+ "logits/rejected": 0.1447157859802246,
842
+ "logps/chosen": -672.2132568359375,
843
+ "logps/rejected": -709.0341796875,
844
+ "loss": 0.0178,
845
+ "rewards/accuracies": 0.8687499761581421,
846
+ "rewards/chosen": -3.1768195629119873,
847
+ "rewards/margins": 1.8273181915283203,
848
+ "rewards/rejected": -5.0041375160217285,
849
+ "step": 600
850
+ },
851
+ {
852
+ "epoch": 1.71,
853
+ "learning_rate": 1.1844111841977633e-07,
854
+ "logits/chosen": 0.2875896990299225,
855
+ "logits/rejected": 0.30075111985206604,
856
+ "logps/chosen": -604.6449584960938,
857
+ "logps/rejected": -719.7764892578125,
858
+ "loss": 0.0192,
859
+ "rewards/accuracies": 0.862500011920929,
860
+ "rewards/chosen": -2.9635517597198486,
861
+ "rewards/margins": 1.8338435888290405,
862
+ "rewards/rejected": -4.797394752502441,
863
+ "step": 610
864
+ },
865
+ {
866
+ "epoch": 1.74,
867
+ "learning_rate": 9.630652236279625e-08,
868
+ "logits/chosen": 0.221513032913208,
869
+ "logits/rejected": 0.3305627405643463,
870
+ "logps/chosen": -552.0277099609375,
871
+ "logps/rejected": -626.5653076171875,
872
+ "loss": 0.0202,
873
+ "rewards/accuracies": 0.8062499761581421,
874
+ "rewards/chosen": -2.8746533393859863,
875
+ "rewards/margins": 1.538907766342163,
876
+ "rewards/rejected": -4.4135613441467285,
877
+ "step": 620
878
+ },
879
+ {
880
+ "epoch": 1.77,
881
+ "learning_rate": 7.63558210174814e-08,
882
+ "logits/chosen": 0.1648671180009842,
883
+ "logits/rejected": 0.23953859508037567,
884
+ "logps/chosen": -628.3140869140625,
885
+ "logps/rejected": -684.0084228515625,
886
+ "loss": 0.0207,
887
+ "rewards/accuracies": 0.800000011920929,
888
+ "rewards/chosen": -3.077697277069092,
889
+ "rewards/margins": 1.5620543956756592,
890
+ "rewards/rejected": -4.639751434326172,
891
+ "step": 630
892
+ },
893
+ {
894
+ "epoch": 1.8,
895
+ "learning_rate": 5.8637227890115273e-08,
896
+ "logits/chosen": 0.23085036873817444,
897
+ "logits/rejected": 0.26288902759552,
898
+ "logps/chosen": -644.9296875,
899
+ "logps/rejected": -711.1282348632812,
900
+ "loss": 0.0211,
901
+ "rewards/accuracies": 0.8500000238418579,
902
+ "rewards/chosen": -2.8837387561798096,
903
+ "rewards/margins": 1.888494849205017,
904
+ "rewards/rejected": -4.772233963012695,
905
+ "step": 640
906
+ },
907
+ {
908
+ "epoch": 1.83,
909
+ "learning_rate": 4.3193562302499046e-08,
910
+ "logits/chosen": 0.21587777137756348,
911
+ "logits/rejected": 0.3251447081565857,
912
+ "logps/chosen": -587.2296752929688,
913
+ "logps/rejected": -655.6583251953125,
914
+ "loss": 0.0202,
915
+ "rewards/accuracies": 0.8062499761581421,
916
+ "rewards/chosen": -2.720548629760742,
917
+ "rewards/margins": 1.5969436168670654,
918
+ "rewards/rejected": -4.317492485046387,
919
+ "step": 650
920
+ },
921
+ {
922
+ "epoch": 1.85,
923
+ "learning_rate": 3.006214591340339e-08,
924
+ "logits/chosen": 0.20460787415504456,
925
+ "logits/rejected": 0.23312047123908997,
926
+ "logps/chosen": -648.768798828125,
927
+ "logps/rejected": -724.6717529296875,
928
+ "loss": 0.0191,
929
+ "rewards/accuracies": 0.8187500238418579,
930
+ "rewards/chosen": -2.980959177017212,
931
+ "rewards/margins": 1.9149051904678345,
932
+ "rewards/rejected": -4.895864009857178,
933
+ "step": 660
934
+ },
935
+ {
936
+ "epoch": 1.88,
937
+ "learning_rate": 1.9274712525847447e-08,
938
+ "logits/chosen": 0.28748980164527893,
939
+ "logits/rejected": 0.27773481607437134,
940
+ "logps/chosen": -597.6702880859375,
941
+ "logps/rejected": -656.3522338867188,
942
+ "loss": 0.0206,
943
+ "rewards/accuracies": 0.8125,
944
+ "rewards/chosen": -2.8257060050964355,
945
+ "rewards/margins": 1.6687313318252563,
946
+ "rewards/rejected": -4.494436740875244,
947
+ "step": 670
948
+ },
949
+ {
950
+ "epoch": 1.91,
951
+ "learning_rate": 1.0857331398169577e-08,
952
+ "logits/chosen": 0.19040969014167786,
953
+ "logits/rejected": 0.18281084299087524,
954
+ "logps/chosen": -629.0142822265625,
955
+ "logps/rejected": -732.8785400390625,
956
+ "loss": 0.0189,
957
+ "rewards/accuracies": 0.862500011920929,
958
+ "rewards/chosen": -2.8325679302215576,
959
+ "rewards/margins": 2.0608155727386475,
960
+ "rewards/rejected": -4.893383979797363,
961
+ "step": 680
962
+ },
963
+ {
964
+ "epoch": 1.94,
965
+ "learning_rate": 4.830344244220686e-09,
966
+ "logits/chosen": 0.2010643184185028,
967
+ "logits/rejected": 0.30202826857566833,
968
+ "logps/chosen": -644.9835815429688,
969
+ "logps/rejected": -706.9172973632812,
970
+ "loss": 0.0224,
971
+ "rewards/accuracies": 0.84375,
972
+ "rewards/chosen": -3.023210287094116,
973
+ "rewards/margins": 1.628251314163208,
974
+ "rewards/rejected": -4.651461601257324,
975
+ "step": 690
976
+ },
977
+ {
978
+ "epoch": 1.97,
979
+ "learning_rate": 1.2083160749236653e-09,
980
+ "logits/chosen": 0.16012658178806305,
981
+ "logits/rejected": 0.22160223126411438,
982
+ "logps/chosen": -615.0055541992188,
983
+ "logps/rejected": -672.9305419921875,
984
+ "loss": 0.0211,
985
+ "rewards/accuracies": 0.831250011920929,
986
+ "rewards/chosen": -2.802346706390381,
987
+ "rewards/margins": 1.7504093647003174,
988
+ "rewards/rejected": -4.552755832672119,
989
+ "step": 700
990
+ },
991
+ {
992
+ "epoch": 2.0,
993
+ "learning_rate": 0.0,
994
+ "logits/chosen": 0.26305443048477173,
995
+ "logits/rejected": 0.27064579725265503,
996
+ "logps/chosen": -540.3432006835938,
997
+ "logps/rejected": -643.52099609375,
998
+ "loss": 0.0194,
999
+ "rewards/accuracies": 0.856249988079071,
1000
+ "rewards/chosen": -2.756078004837036,
1001
+ "rewards/margins": 1.6126619577407837,
1002
+ "rewards/rejected": -4.368740081787109,
1003
+ "step": 710
1004
+ },
1005
+ {
1006
+ "epoch": 2.0,
1007
+ "step": 710,
1008
  "total_flos": 0.0,
1009
+ "train_loss": 0.21065937945960272,
1010
+ "train_runtime": 10560.1161,
1011
+ "train_samples_per_second": 8.626,
1012
  "train_steps_per_second": 0.067
1013
  }
1014
  ],
1015
  "logging_steps": 10,
1016
+ "max_steps": 710,
1017
+ "num_train_epochs": 2,
1018
  "save_steps": 10000,
1019
  "total_flos": 0.0,
1020
  "trial_name": null,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d0b6200b167da15766e0f1c1654349573916e6e47dd7b4ffd0acb38743edddd
3
  size 6648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:896a531bb106e3c3ac0c19175569fde1dd12c8fcc0ef3098c749a00ff2d0b2f1
3
  size 6648