ghemdd commited on
Commit
5a6ed88
1 Parent(s): 6e23fd9

Model save

Browse files
README.md ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ library_name: peft
4
+ tags:
5
+ - trl
6
+ - dpo
7
+ - generated_from_trainer
8
+ base_model: apple/OpenELM-450M
9
+ model-index:
10
+ - name: OpenELM-450M_lora
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # OpenELM-450M_lora
18
+
19
+ This model is a fine-tuned version of [apple/OpenELM-450M](https://huggingface.co/apple/OpenELM-450M) on the None dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.6940
22
+ - Rewards/chosen: 0.0062
23
+ - Rewards/rejected: 0.0064
24
+ - Rewards/accuracies: 0.4748
25
+ - Rewards/margins: -0.0002
26
+ - Logps/rejected: -567.8893
27
+ - Logps/chosen: -579.9698
28
+ - Logits/rejected: -11.8584
29
+ - Logits/chosen: -12.0367
30
+
31
+ ## Model description
32
+
33
+ More information needed
34
+
35
+ ## Intended uses & limitations
36
+
37
+ More information needed
38
+
39
+ ## Training and evaluation data
40
+
41
+ More information needed
42
+
43
+ ## Training procedure
44
+
45
+ ### Training hyperparameters
46
+
47
+ The following hyperparameters were used during training:
48
+ - learning_rate: 5e-07
49
+ - train_batch_size: 8
50
+ - eval_batch_size: 8
51
+ - seed: 42
52
+ - gradient_accumulation_steps: 8
53
+ - total_train_batch_size: 64
54
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
+ - lr_scheduler_type: cosine
56
+ - lr_scheduler_warmup_ratio: 0.1
57
+ - num_epochs: 1
58
+
59
+ ### Training results
60
+
61
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
+ |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
+ | 0.6943 | 0.8975 | 300 | 0.6940 | 0.0062 | 0.0064 | 0.4748 | -0.0002 | -567.8893 | -579.9698 | -11.8584 | -12.0367 |
64
+
65
+
66
+ ### Framework versions
67
+
68
+ - PEFT 0.11.0
69
+ - Transformers 4.40.2
70
+ - Pytorch 2.3.0+cu121
71
+ - Datasets 2.19.1
72
+ - Tokenizers 0.19.1
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2dfc06585887b8ffff4b74c2b39fef961049b488b2fff0fe62ec5a439793bc84
3
  size 7670448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac4dd479051fdc10ffeda881ae3924cd935a5ac91f0d54c722462201c441f21a
3
  size 7670448
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9992520568436799,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.6938572500994106,
5
+ "train_runtime": 20825.8975,
6
+ "train_samples": 21390,
7
+ "train_samples_per_second": 1.027,
8
+ "train_steps_per_second": 0.016
9
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9992520568436799,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.6938572500994106,
5
+ "train_runtime": 20825.8975,
6
+ "train_samples": 21390,
7
+ "train_samples_per_second": 1.027,
8
+ "train_steps_per_second": 0.016
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,556 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9992520568436799,
5
+ "eval_steps": 300,
6
+ "global_step": 334,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0029917726252804786,
13
+ "grad_norm": 0.515625,
14
+ "learning_rate": 1.4705882352941176e-08,
15
+ "logits/chosen": -11.9029541015625,
16
+ "logits/rejected": -11.867537498474121,
17
+ "logps/chosen": -446.77239990234375,
18
+ "logps/rejected": -451.92742919921875,
19
+ "loss": 0.6931,
20
+ "rewards/accuracies": 0.0,
21
+ "rewards/chosen": 0.0,
22
+ "rewards/margins": 0.0,
23
+ "rewards/rejected": 0.0,
24
+ "step": 1
25
+ },
26
+ {
27
+ "epoch": 0.029917726252804786,
28
+ "grad_norm": 2.84375,
29
+ "learning_rate": 1.4705882352941175e-07,
30
+ "logits/chosen": -11.970257759094238,
31
+ "logits/rejected": -11.80525016784668,
32
+ "logps/chosen": -548.0371704101562,
33
+ "logps/rejected": -528.268798828125,
34
+ "loss": 0.693,
35
+ "rewards/accuracies": 0.4184027910232544,
36
+ "rewards/chosen": 0.0011305785737931728,
37
+ "rewards/margins": 0.0008499751565977931,
38
+ "rewards/rejected": 0.0002806035045068711,
39
+ "step": 10
40
+ },
41
+ {
42
+ "epoch": 0.05983545250560957,
43
+ "grad_norm": 1.484375,
44
+ "learning_rate": 2.941176470588235e-07,
45
+ "logits/chosen": -12.091507911682129,
46
+ "logits/rejected": -11.931904792785645,
47
+ "logps/chosen": -605.2603149414062,
48
+ "logps/rejected": -594.6846923828125,
49
+ "loss": 0.6946,
50
+ "rewards/accuracies": 0.4781250059604645,
51
+ "rewards/chosen": -0.0016426166985183954,
52
+ "rewards/margins": -0.001760849030688405,
53
+ "rewards/rejected": 0.00011823121167253703,
54
+ "step": 20
55
+ },
56
+ {
57
+ "epoch": 0.08975317875841436,
58
+ "grad_norm": 0.8125,
59
+ "learning_rate": 4.4117647058823526e-07,
60
+ "logits/chosen": -12.251832962036133,
61
+ "logits/rejected": -11.93907356262207,
62
+ "logps/chosen": -627.1741333007812,
63
+ "logps/rejected": -592.0576782226562,
64
+ "loss": 0.6927,
65
+ "rewards/accuracies": 0.4765625,
66
+ "rewards/chosen": -0.013599636033177376,
67
+ "rewards/margins": 0.001697429222986102,
68
+ "rewards/rejected": -0.015297065488994122,
69
+ "step": 30
70
+ },
71
+ {
72
+ "epoch": 0.11967090501121914,
73
+ "grad_norm": 0.7734375,
74
+ "learning_rate": 4.995066821070679e-07,
75
+ "logits/chosen": -12.038887023925781,
76
+ "logits/rejected": -11.891900062561035,
77
+ "logps/chosen": -573.0090942382812,
78
+ "logps/rejected": -555.4964599609375,
79
+ "loss": 0.6966,
80
+ "rewards/accuracies": 0.4390625059604645,
81
+ "rewards/chosen": -0.009484687820076942,
82
+ "rewards/margins": -0.005814659409224987,
83
+ "rewards/rejected": -0.0036700288765132427,
84
+ "step": 40
85
+ },
86
+ {
87
+ "epoch": 0.14958863126402394,
88
+ "grad_norm": 1.1796875,
89
+ "learning_rate": 4.964990092676262e-07,
90
+ "logits/chosen": -12.046560287475586,
91
+ "logits/rejected": -11.745036125183105,
92
+ "logps/chosen": -487.1591796875,
93
+ "logps/rejected": -467.3463439941406,
94
+ "loss": 0.6961,
95
+ "rewards/accuracies": 0.4625000059604645,
96
+ "rewards/chosen": -0.014206953346729279,
97
+ "rewards/margins": -0.0039894962683320045,
98
+ "rewards/rejected": -0.010217458009719849,
99
+ "step": 50
100
+ },
101
+ {
102
+ "epoch": 0.17950635751682872,
103
+ "grad_norm": 0.65234375,
104
+ "learning_rate": 4.907906416994145e-07,
105
+ "logits/chosen": -11.951323509216309,
106
+ "logits/rejected": -11.81787109375,
107
+ "logps/chosen": -524.3377685546875,
108
+ "logps/rejected": -518.5364990234375,
109
+ "loss": 0.6951,
110
+ "rewards/accuracies": 0.4906249940395355,
111
+ "rewards/chosen": -0.007045179605484009,
112
+ "rewards/margins": -0.0024539525620639324,
113
+ "rewards/rejected": -0.004591226577758789,
114
+ "step": 60
115
+ },
116
+ {
117
+ "epoch": 0.2094240837696335,
118
+ "grad_norm": 0.7109375,
119
+ "learning_rate": 4.824441214720628e-07,
120
+ "logits/chosen": -12.155089378356934,
121
+ "logits/rejected": -11.935697555541992,
122
+ "logps/chosen": -614.5479736328125,
123
+ "logps/rejected": -598.7986450195312,
124
+ "loss": 0.694,
125
+ "rewards/accuracies": 0.4921875,
126
+ "rewards/chosen": -0.004119172692298889,
127
+ "rewards/margins": -0.0007487249677069485,
128
+ "rewards/rejected": -0.003370448248460889,
129
+ "step": 70
130
+ },
131
+ {
132
+ "epoch": 0.2393418100224383,
133
+ "grad_norm": 1.125,
134
+ "learning_rate": 4.7155089480780365e-07,
135
+ "logits/chosen": -12.310976028442383,
136
+ "logits/rejected": -11.926130294799805,
137
+ "logps/chosen": -590.0772705078125,
138
+ "logps/rejected": -552.1112060546875,
139
+ "loss": 0.694,
140
+ "rewards/accuracies": 0.4703125059604645,
141
+ "rewards/chosen": -0.014592505991458893,
142
+ "rewards/margins": -0.000543795176781714,
143
+ "rewards/rejected": -0.014048713259398937,
144
+ "step": 80
145
+ },
146
+ {
147
+ "epoch": 0.26925953627524307,
148
+ "grad_norm": 1.7890625,
149
+ "learning_rate": 4.582303101775248e-07,
150
+ "logits/chosen": -12.02568244934082,
151
+ "logits/rejected": -11.863547325134277,
152
+ "logps/chosen": -596.1324462890625,
153
+ "logps/rejected": -583.9989624023438,
154
+ "loss": 0.6946,
155
+ "rewards/accuracies": 0.4625000059604645,
156
+ "rewards/chosen": -0.00669059157371521,
157
+ "rewards/margins": -0.0019042941275984049,
158
+ "rewards/rejected": -0.004786298610270023,
159
+ "step": 90
160
+ },
161
+ {
162
+ "epoch": 0.2991772625280479,
163
+ "grad_norm": 1.1015625,
164
+ "learning_rate": 4.426283106939473e-07,
165
+ "logits/chosen": -12.22739028930664,
166
+ "logits/rejected": -11.815451622009277,
167
+ "logps/chosen": -604.3762817382812,
168
+ "logps/rejected": -574.3903198242188,
169
+ "loss": 0.6949,
170
+ "rewards/accuracies": 0.4828124940395355,
171
+ "rewards/chosen": 0.0030080166179686785,
172
+ "rewards/margins": -0.0021901517175137997,
173
+ "rewards/rejected": 0.0051981681026518345,
174
+ "step": 100
175
+ },
176
+ {
177
+ "epoch": 0.32909498878085264,
178
+ "grad_norm": 1.453125,
179
+ "learning_rate": 4.249158351283413e-07,
180
+ "logits/chosen": -12.154890060424805,
181
+ "logits/rejected": -11.859006881713867,
182
+ "logps/chosen": -524.4495849609375,
183
+ "logps/rejected": -509.00946044921875,
184
+ "loss": 0.6954,
185
+ "rewards/accuracies": 0.48124998807907104,
186
+ "rewards/chosen": 0.002577757928520441,
187
+ "rewards/margins": -0.002684831153601408,
188
+ "rewards/rejected": 0.005262589547783136,
189
+ "step": 110
190
+ },
191
+ {
192
+ "epoch": 0.35901271503365745,
193
+ "grad_norm": 3.0625,
194
+ "learning_rate": 4.0528694506957754e-07,
195
+ "logits/chosen": -12.192368507385254,
196
+ "logits/rejected": -12.028105735778809,
197
+ "logps/chosen": -528.8504028320312,
198
+ "logps/rejected": -517.0180053710938,
199
+ "loss": 0.693,
200
+ "rewards/accuracies": 0.5015624761581421,
201
+ "rewards/chosen": 0.007784114684909582,
202
+ "rewards/margins": 0.0019344612956047058,
203
+ "rewards/rejected": 0.005849653389304876,
204
+ "step": 120
205
+ },
206
+ {
207
+ "epoch": 0.3889304412864622,
208
+ "grad_norm": 1.0546875,
209
+ "learning_rate": 3.839566987447491e-07,
210
+ "logits/chosen": -12.14574146270752,
211
+ "logits/rejected": -12.06743335723877,
212
+ "logps/chosen": -646.7266845703125,
213
+ "logps/rejected": -643.1847534179688,
214
+ "loss": 0.6923,
215
+ "rewards/accuracies": 0.4906249940395355,
216
+ "rewards/chosen": -0.00342937046661973,
217
+ "rewards/margins": 0.0035477778874337673,
218
+ "rewards/rejected": -0.006977148354053497,
219
+ "step": 130
220
+ },
221
+ {
222
+ "epoch": 0.418848167539267,
223
+ "grad_norm": 0.7421875,
224
+ "learning_rate": 3.6115879479623183e-07,
225
+ "logits/chosen": -11.896639823913574,
226
+ "logits/rejected": -11.759775161743164,
227
+ "logps/chosen": -548.5411376953125,
228
+ "logps/rejected": -533.6222534179688,
229
+ "loss": 0.6943,
230
+ "rewards/accuracies": 0.4984374940395355,
231
+ "rewards/chosen": -0.006624075584113598,
232
+ "rewards/margins": -0.00133815489243716,
233
+ "rewards/rejected": -0.005285919643938541,
234
+ "step": 140
235
+ },
236
+ {
237
+ "epoch": 0.4487658937920718,
238
+ "grad_norm": 0.87109375,
239
+ "learning_rate": 3.371430118304538e-07,
240
+ "logits/chosen": -11.987521171569824,
241
+ "logits/rejected": -11.82154655456543,
242
+ "logps/chosen": -599.1915283203125,
243
+ "logps/rejected": -586.5440673828125,
244
+ "loss": 0.6945,
245
+ "rewards/accuracies": 0.4593749940395355,
246
+ "rewards/chosen": -0.0005397430504672229,
247
+ "rewards/margins": -0.001651047496125102,
248
+ "rewards/rejected": 0.0011113042710348964,
249
+ "step": 150
250
+ },
251
+ {
252
+ "epoch": 0.4786836200448766,
253
+ "grad_norm": 2.078125,
254
+ "learning_rate": 3.121724717912138e-07,
255
+ "logits/chosen": -12.010518074035645,
256
+ "logits/rejected": -11.801678657531738,
257
+ "logps/chosen": -477.989013671875,
258
+ "logps/rejected": -461.06695556640625,
259
+ "loss": 0.6923,
260
+ "rewards/accuracies": 0.5,
261
+ "rewards/chosen": -0.002107062842696905,
262
+ "rewards/margins": 0.002797911176458001,
263
+ "rewards/rejected": -0.00490497425198555,
264
+ "step": 160
265
+ },
266
+ {
267
+ "epoch": 0.5086013462976814,
268
+ "grad_norm": 1.046875,
269
+ "learning_rate": 2.865207571406029e-07,
270
+ "logits/chosen": -12.087748527526855,
271
+ "logits/rejected": -11.823697090148926,
272
+ "logps/chosen": -565.2094116210938,
273
+ "logps/rejected": -535.0115966796875,
274
+ "loss": 0.6945,
275
+ "rewards/accuracies": 0.45625001192092896,
276
+ "rewards/chosen": -0.010714416392147541,
277
+ "rewards/margins": -0.0014880959643051028,
278
+ "rewards/rejected": -0.009226320311427116,
279
+ "step": 170
280
+ },
281
+ {
282
+ "epoch": 0.5385190725504861,
283
+ "grad_norm": 0.69140625,
284
+ "learning_rate": 2.6046891343229986e-07,
285
+ "logits/chosen": -12.05897331237793,
286
+ "logits/rejected": -11.826770782470703,
287
+ "logps/chosen": -639.4141845703125,
288
+ "logps/rejected": -625.544189453125,
289
+ "loss": 0.694,
290
+ "rewards/accuracies": 0.4703125059604645,
291
+ "rewards/chosen": -0.008839543908834457,
292
+ "rewards/margins": -0.0007707075565122068,
293
+ "rewards/rejected": -0.008068837225437164,
294
+ "step": 180
295
+ },
296
+ {
297
+ "epoch": 0.5684367988032909,
298
+ "grad_norm": 2.15625,
299
+ "learning_rate": 2.3430237011767164e-07,
300
+ "logits/chosen": -12.140950202941895,
301
+ "logits/rejected": -11.830968856811523,
302
+ "logps/chosen": -564.7281494140625,
303
+ "logps/rejected": -540.7762451171875,
304
+ "loss": 0.6931,
305
+ "rewards/accuracies": 0.484375,
306
+ "rewards/chosen": -0.00785023346543312,
307
+ "rewards/margins": 0.0012117780279368162,
308
+ "rewards/rejected": -0.00906201172620058,
309
+ "step": 190
310
+ },
311
+ {
312
+ "epoch": 0.5983545250560958,
313
+ "grad_norm": 0.64453125,
314
+ "learning_rate": 2.0830781332097445e-07,
315
+ "logits/chosen": -12.064250946044922,
316
+ "logits/rejected": -11.847702026367188,
317
+ "logps/chosen": -601.1182861328125,
318
+ "logps/rejected": -584.1577758789062,
319
+ "loss": 0.6932,
320
+ "rewards/accuracies": 0.46562498807907104,
321
+ "rewards/chosen": -0.0038043882232159376,
322
+ "rewards/margins": 0.0006765492144040763,
323
+ "rewards/rejected": -0.0044809365645051,
324
+ "step": 200
325
+ },
326
+ {
327
+ "epoch": 0.6282722513089005,
328
+ "grad_norm": 0.6796875,
329
+ "learning_rate": 1.8277004484618357e-07,
330
+ "logits/chosen": -12.159658432006836,
331
+ "logits/rejected": -11.93867015838623,
332
+ "logps/chosen": -570.8473510742188,
333
+ "logps/rejected": -558.4066162109375,
334
+ "loss": 0.6956,
335
+ "rewards/accuracies": 0.46562498807907104,
336
+ "rewards/chosen": -0.01011698879301548,
337
+ "rewards/margins": -0.003837681608274579,
338
+ "rewards/rejected": -0.006279306020587683,
339
+ "step": 210
340
+ },
341
+ {
342
+ "epoch": 0.6581899775617053,
343
+ "grad_norm": 0.90625,
344
+ "learning_rate": 1.579688618288305e-07,
345
+ "logits/chosen": -12.136404991149902,
346
+ "logits/rejected": -11.933469772338867,
347
+ "logps/chosen": -653.62841796875,
348
+ "logps/rejected": -629.8015747070312,
349
+ "loss": 0.6917,
350
+ "rewards/accuracies": 0.5062500238418579,
351
+ "rewards/chosen": 0.0027369544841349125,
352
+ "rewards/margins": 0.003812599228695035,
353
+ "rewards/rejected": -0.0010756452102214098,
354
+ "step": 220
355
+ },
356
+ {
357
+ "epoch": 0.6881077038145101,
358
+ "grad_norm": 1.046875,
359
+ "learning_rate": 1.341759912200346e-07,
360
+ "logits/chosen": -12.061933517456055,
361
+ "logits/rejected": -11.955540657043457,
362
+ "logps/chosen": -541.6209716796875,
363
+ "logps/rejected": -530.1668701171875,
364
+ "loss": 0.69,
365
+ "rewards/accuracies": 0.526562511920929,
366
+ "rewards/chosen": 0.006943908985704184,
367
+ "rewards/margins": 0.007641922682523727,
368
+ "rewards/rejected": -0.0006980125908739865,
369
+ "step": 230
370
+ },
371
+ {
372
+ "epoch": 0.7180254300673149,
373
+ "grad_norm": 2.078125,
374
+ "learning_rate": 1.11652112689164e-07,
375
+ "logits/chosen": -11.996115684509277,
376
+ "logits/rejected": -11.906721115112305,
377
+ "logps/chosen": -514.420166015625,
378
+ "logps/rejected": -510.46307373046875,
379
+ "loss": 0.6932,
380
+ "rewards/accuracies": 0.5062500238418579,
381
+ "rewards/chosen": 0.004082207567989826,
382
+ "rewards/margins": 0.0010683867149055004,
383
+ "rewards/rejected": 0.003013820853084326,
384
+ "step": 240
385
+ },
386
+ {
387
+ "epoch": 0.7479431563201197,
388
+ "grad_norm": 0.60546875,
389
+ "learning_rate": 9.064400256282755e-08,
390
+ "logits/chosen": -11.868894577026367,
391
+ "logits/rejected": -11.739983558654785,
392
+ "logps/chosen": -636.687744140625,
393
+ "logps/rejected": -622.6748657226562,
394
+ "loss": 0.6937,
395
+ "rewards/accuracies": 0.4984374940395355,
396
+ "rewards/chosen": 0.007722427137196064,
397
+ "rewards/margins": -8.60728334828309e-07,
398
+ "rewards/rejected": 0.007723286747932434,
399
+ "step": 250
400
+ },
401
+ {
402
+ "epoch": 0.7778608825729244,
403
+ "grad_norm": 0.625,
404
+ "learning_rate": 7.138183009179921e-08,
405
+ "logits/chosen": -11.95314884185791,
406
+ "logits/rejected": -11.800286293029785,
407
+ "logps/chosen": -516.5999755859375,
408
+ "logps/rejected": -502.53424072265625,
409
+ "loss": 0.6941,
410
+ "rewards/accuracies": 0.46875,
411
+ "rewards/chosen": 0.003324865596368909,
412
+ "rewards/margins": -0.0007609135354869068,
413
+ "rewards/rejected": 0.004085779655724764,
414
+ "step": 260
415
+ },
416
+ {
417
+ "epoch": 0.8077786088257293,
418
+ "grad_norm": 2.03125,
419
+ "learning_rate": 5.4076635668540065e-08,
420
+ "logits/chosen": -11.870991706848145,
421
+ "logits/rejected": -11.772178649902344,
422
+ "logps/chosen": -537.8231201171875,
423
+ "logps/rejected": -536.79296875,
424
+ "loss": 0.6933,
425
+ "rewards/accuracies": 0.48124998807907104,
426
+ "rewards/chosen": -0.0003947736695408821,
427
+ "rewards/margins": 0.0007606123690493405,
428
+ "rewards/rejected": -0.0011553869117051363,
429
+ "step": 270
430
+ },
431
+ {
432
+ "epoch": 0.837696335078534,
433
+ "grad_norm": 0.59765625,
434
+ "learning_rate": 3.8918018624496286e-08,
435
+ "logits/chosen": -12.036161422729492,
436
+ "logits/rejected": -11.860921859741211,
437
+ "logps/chosen": -554.2711791992188,
438
+ "logps/rejected": -541.3438720703125,
439
+ "loss": 0.6953,
440
+ "rewards/accuracies": 0.4781250059604645,
441
+ "rewards/chosen": 0.00031470804242417216,
442
+ "rewards/margins": -0.0031545311212539673,
443
+ "rewards/rejected": 0.0034692403860390186,
444
+ "step": 280
445
+ },
446
+ {
447
+ "epoch": 0.8676140613313388,
448
+ "grad_norm": 2.578125,
449
+ "learning_rate": 2.6072059940146772e-08,
450
+ "logits/chosen": -12.018091201782227,
451
+ "logits/rejected": -11.894245147705078,
452
+ "logps/chosen": -569.8294677734375,
453
+ "logps/rejected": -563.2034912109375,
454
+ "loss": 0.6936,
455
+ "rewards/accuracies": 0.4921875,
456
+ "rewards/chosen": 0.004260566085577011,
457
+ "rewards/margins": 0.0009015941177494824,
458
+ "rewards/rejected": 0.003358972491696477,
459
+ "step": 290
460
+ },
461
+ {
462
+ "epoch": 0.8975317875841436,
463
+ "grad_norm": 1.015625,
464
+ "learning_rate": 1.5679502627027136e-08,
465
+ "logits/chosen": -12.114303588867188,
466
+ "logits/rejected": -11.98996639251709,
467
+ "logps/chosen": -567.474853515625,
468
+ "logps/rejected": -552.4931640625,
469
+ "loss": 0.6943,
470
+ "rewards/accuracies": 0.46406251192092896,
471
+ "rewards/chosen": 0.0020927421282976866,
472
+ "rewards/margins": -0.0002106330794049427,
473
+ "rewards/rejected": 0.0023033744655549526,
474
+ "step": 300
475
+ },
476
+ {
477
+ "epoch": 0.8975317875841436,
478
+ "eval_logits/chosen": -12.036696434020996,
479
+ "eval_logits/rejected": -11.858353614807129,
480
+ "eval_logps/chosen": -579.9698486328125,
481
+ "eval_logps/rejected": -567.8892822265625,
482
+ "eval_loss": 0.6939929127693176,
483
+ "eval_rewards/accuracies": 0.4747757911682129,
484
+ "eval_rewards/chosen": 0.0061609940603375435,
485
+ "eval_rewards/margins": -0.00024100362497847527,
486
+ "eval_rewards/rejected": 0.006401997059583664,
487
+ "eval_runtime": 2311.0773,
488
+ "eval_samples_per_second": 2.314,
489
+ "eval_steps_per_second": 0.289,
490
+ "step": 300
491
+ },
492
+ {
493
+ "epoch": 0.9274495138369484,
494
+ "grad_norm": 0.7890625,
495
+ "learning_rate": 7.85420971784223e-09,
496
+ "logits/chosen": -12.027294158935547,
497
+ "logits/rejected": -11.775075912475586,
498
+ "logps/chosen": -637.2244262695312,
499
+ "logps/rejected": -614.4982299804688,
500
+ "loss": 0.6929,
501
+ "rewards/accuracies": 0.49687498807907104,
502
+ "rewards/chosen": 0.008133328519761562,
503
+ "rewards/margins": 0.0024830640759319067,
504
+ "rewards/rejected": 0.005650263279676437,
505
+ "step": 310
506
+ },
507
+ {
508
+ "epoch": 0.9573672400897532,
509
+ "grad_norm": 0.99609375,
510
+ "learning_rate": 2.6819167592529168e-09,
511
+ "logits/chosen": -11.920928955078125,
512
+ "logits/rejected": -11.692464828491211,
513
+ "logps/chosen": -622.5579223632812,
514
+ "logps/rejected": -609.9359130859375,
515
+ "loss": 0.693,
516
+ "rewards/accuracies": 0.48906248807907104,
517
+ "rewards/chosen": 0.012452816590666771,
518
+ "rewards/margins": 0.0013520055217668414,
519
+ "rewards/rejected": 0.011100810021162033,
520
+ "step": 320
521
+ },
522
+ {
523
+ "epoch": 0.9872849663425579,
524
+ "grad_norm": 1.734375,
525
+ "learning_rate": 2.1929247528540418e-10,
526
+ "logits/chosen": -12.089780807495117,
527
+ "logits/rejected": -11.94688606262207,
528
+ "logps/chosen": -568.0908203125,
529
+ "logps/rejected": -560.9391479492188,
530
+ "loss": 0.695,
531
+ "rewards/accuracies": 0.512499988079071,
532
+ "rewards/chosen": 0.013248354196548462,
533
+ "rewards/margins": -0.0018901375588029623,
534
+ "rewards/rejected": 0.015138491988182068,
535
+ "step": 330
536
+ },
537
+ {
538
+ "epoch": 0.9992520568436799,
539
+ "step": 334,
540
+ "total_flos": 0.0,
541
+ "train_loss": 0.6938572500994106,
542
+ "train_runtime": 20825.8975,
543
+ "train_samples_per_second": 1.027,
544
+ "train_steps_per_second": 0.016
545
+ }
546
+ ],
547
+ "logging_steps": 10,
548
+ "max_steps": 334,
549
+ "num_input_tokens_seen": 0,
550
+ "num_train_epochs": 1,
551
+ "save_steps": 300,
552
+ "total_flos": 0.0,
553
+ "train_batch_size": 8,
554
+ "trial_name": null,
555
+ "trial_params": null
556
+ }