lole25 commited on
Commit
0b4ce1a
1 Parent(s): 48c5027

Model save

Browse files
README.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ library_name: peft
4
+ tags:
5
+ - trl
6
+ - dpo
7
+ - generated_from_trainer
8
+ base_model: DUAL-GPO/phi-2-gpo-new-i0
9
+ model-index:
10
+ - name: phi-2-gpo-v17-i1
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # phi-2-gpo-v17-i1
18
+
19
+ This model is a fine-tuned version of [DUAL-GPO/phi-2-gpo-new-i0](https://huggingface.co/DUAL-GPO/phi-2-gpo-new-i0) on the None dataset.
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - learning_rate: 5e-06
39
+ - train_batch_size: 4
40
+ - eval_batch_size: 4
41
+ - seed: 42
42
+ - distributed_type: multi-GPU
43
+ - num_devices: 2
44
+ - gradient_accumulation_steps: 4
45
+ - total_train_batch_size: 32
46
+ - total_eval_batch_size: 8
47
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
48
+ - lr_scheduler_type: cosine
49
+ - lr_scheduler_warmup_ratio: 0.1
50
+ - num_epochs: 1
51
+
52
+ ### Training results
53
+
54
+
55
+
56
+ ### Framework versions
57
+
58
+ - PEFT 0.7.1
59
+ - Transformers 4.36.2
60
+ - Pytorch 2.1.2+cu121
61
+ - Datasets 2.14.6
62
+ - Tokenizers 0.15.2
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00dcda0e765a9b56cedd96d09e920d2c58130520e911e5e398f27925739ea356
3
  size 167807296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bfd291f7e2c26d83d180708d8587922ae2c88f381f35820bfb0e58d76dfe9fd
3
  size 167807296
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.2399272450015076,
4
+ "train_runtime": 4417.3525,
5
+ "train_samples": 15000,
6
+ "train_samples_per_second": 3.396,
7
+ "train_steps_per_second": 0.106
8
+ }
runs/May14_01-46-16_gpu4-119-5/events.out.tfevents.1715615284.gpu4-119-5.874343.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90516ba11f1ac172f767d4c7cdf4a6be90f7b59ee3ebc6293b99d46f826edeeb
3
- size 23825
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc5db9f128c54a07c8751a4a4da422ebaf144f4664661894f518e5ec34385714
3
+ size 27983
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.2399272450015076,
4
+ "train_runtime": 4417.3525,
5
+ "train_samples": 15000,
6
+ "train_samples_per_second": 3.396,
7
+ "train_steps_per_second": 0.106
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,688 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9984,
5
+ "eval_steps": 500,
6
+ "global_step": 468,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "learning_rate": 1.0638297872340426e-07,
14
+ "logits/chosen": 0.18604117631912231,
15
+ "logits/rejected": 0.34631967544555664,
16
+ "logps/chosen": -460.0769348144531,
17
+ "logps/rejected": -351.57135009765625,
18
+ "loss": 0.3612,
19
+ "rewards/accuracies": 0.0,
20
+ "rewards/chosen": 0.0,
21
+ "rewards/margins": 0.0,
22
+ "rewards/rejected": 0.0,
23
+ "step": 1
24
+ },
25
+ {
26
+ "epoch": 0.02,
27
+ "learning_rate": 1.0638297872340427e-06,
28
+ "logits/chosen": 0.05747946724295616,
29
+ "logits/rejected": 0.16539901494979858,
30
+ "logps/chosen": -330.9287414550781,
31
+ "logps/rejected": -328.71575927734375,
32
+ "loss": 0.3819,
33
+ "rewards/accuracies": 0.3888888955116272,
34
+ "rewards/chosen": 2.3913149561849423e-05,
35
+ "rewards/margins": -1.7764228687155992e-05,
36
+ "rewards/rejected": 4.167737643001601e-05,
37
+ "step": 10
38
+ },
39
+ {
40
+ "epoch": 0.04,
41
+ "learning_rate": 2.1276595744680853e-06,
42
+ "logits/chosen": 0.15373219549655914,
43
+ "logits/rejected": 0.19493858516216278,
44
+ "logps/chosen": -325.29803466796875,
45
+ "logps/rejected": -315.1011047363281,
46
+ "loss": 0.3718,
47
+ "rewards/accuracies": 0.4312500059604645,
48
+ "rewards/chosen": -1.974666338355746e-05,
49
+ "rewards/margins": 1.2878153654583002e-07,
50
+ "rewards/rejected": -1.9875456928275526e-05,
51
+ "step": 20
52
+ },
53
+ {
54
+ "epoch": 0.06,
55
+ "learning_rate": 3.191489361702128e-06,
56
+ "logits/chosen": 0.11452794075012207,
57
+ "logits/rejected": 0.1842522919178009,
58
+ "logps/chosen": -371.60943603515625,
59
+ "logps/rejected": -327.19366455078125,
60
+ "loss": 0.3676,
61
+ "rewards/accuracies": 0.4937500059604645,
62
+ "rewards/chosen": -5.7096302043646574e-05,
63
+ "rewards/margins": 3.7131747376406565e-05,
64
+ "rewards/rejected": -9.422805305803195e-05,
65
+ "step": 30
66
+ },
67
+ {
68
+ "epoch": 0.09,
69
+ "learning_rate": 4.255319148936171e-06,
70
+ "logits/chosen": 0.18893569707870483,
71
+ "logits/rejected": 0.2084759920835495,
72
+ "logps/chosen": -337.7607727050781,
73
+ "logps/rejected": -353.72503662109375,
74
+ "loss": 0.3678,
75
+ "rewards/accuracies": 0.5062500238418579,
76
+ "rewards/chosen": 0.00010834180284291506,
77
+ "rewards/margins": 0.00026063303812406957,
78
+ "rewards/rejected": -0.0001522912352811545,
79
+ "step": 40
80
+ },
81
+ {
82
+ "epoch": 0.11,
83
+ "learning_rate": 4.999373573764188e-06,
84
+ "logits/chosen": 0.08207504451274872,
85
+ "logits/rejected": 0.20786412060260773,
86
+ "logps/chosen": -384.91656494140625,
87
+ "logps/rejected": -347.6080627441406,
88
+ "loss": 0.3741,
89
+ "rewards/accuracies": 0.625,
90
+ "rewards/chosen": 0.0010863704374060035,
91
+ "rewards/margins": 0.0014893051702529192,
92
+ "rewards/rejected": -0.0004029346746392548,
93
+ "step": 50
94
+ },
95
+ {
96
+ "epoch": 0.13,
97
+ "learning_rate": 4.988245838331339e-06,
98
+ "logits/chosen": 0.13469652831554413,
99
+ "logits/rejected": 0.17153413593769073,
100
+ "logps/chosen": -372.34124755859375,
101
+ "logps/rejected": -329.98626708984375,
102
+ "loss": 0.3646,
103
+ "rewards/accuracies": 0.6499999761581421,
104
+ "rewards/chosen": 0.0023965700529515743,
105
+ "rewards/margins": 0.0026933744084089994,
106
+ "rewards/rejected": -0.0002968042972497642,
107
+ "step": 60
108
+ },
109
+ {
110
+ "epoch": 0.15,
111
+ "learning_rate": 4.963268819535228e-06,
112
+ "logits/chosen": 0.15689334273338318,
113
+ "logits/rejected": 0.20978419482707977,
114
+ "logps/chosen": -364.29888916015625,
115
+ "logps/rejected": -377.21087646484375,
116
+ "loss": 0.3705,
117
+ "rewards/accuracies": 0.71875,
118
+ "rewards/chosen": 0.0023818810004740953,
119
+ "rewards/margins": 0.0072565278969705105,
120
+ "rewards/rejected": -0.0048746466636657715,
121
+ "step": 70
122
+ },
123
+ {
124
+ "epoch": 0.17,
125
+ "learning_rate": 4.9245815365216115e-06,
126
+ "logits/chosen": 0.11595060676336288,
127
+ "logits/rejected": 0.18608702719211578,
128
+ "logps/chosen": -328.6256103515625,
129
+ "logps/rejected": -365.93896484375,
130
+ "loss": 0.3663,
131
+ "rewards/accuracies": 0.6812499761581421,
132
+ "rewards/chosen": -0.002104334533214569,
133
+ "rewards/margins": 0.011472588405013084,
134
+ "rewards/rejected": -0.013576922006905079,
135
+ "step": 80
136
+ },
137
+ {
138
+ "epoch": 0.19,
139
+ "learning_rate": 4.872399318152594e-06,
140
+ "logits/chosen": 0.08527339994907379,
141
+ "logits/rejected": 0.11540959030389786,
142
+ "logps/chosen": -352.0487060546875,
143
+ "logps/rejected": -361.36578369140625,
144
+ "loss": 0.3456,
145
+ "rewards/accuracies": 0.675000011920929,
146
+ "rewards/chosen": -0.011672710999846458,
147
+ "rewards/margins": 0.01901327446103096,
148
+ "rewards/rejected": -0.03068598173558712,
149
+ "step": 90
150
+ },
151
+ {
152
+ "epoch": 0.21,
153
+ "learning_rate": 4.807012604511542e-06,
154
+ "logits/chosen": 0.14592930674552917,
155
+ "logits/rejected": 0.18039533495903015,
156
+ "logps/chosen": -414.8818359375,
157
+ "logps/rejected": -435.3639221191406,
158
+ "loss": 0.34,
159
+ "rewards/accuracies": 0.6625000238418579,
160
+ "rewards/chosen": -0.04601982235908508,
161
+ "rewards/margins": 0.034013133496046066,
162
+ "rewards/rejected": -0.08003295958042145,
163
+ "step": 100
164
+ },
165
+ {
166
+ "epoch": 0.23,
167
+ "learning_rate": 4.728785330347771e-06,
168
+ "logits/chosen": 0.13395074009895325,
169
+ "logits/rejected": 0.2120208740234375,
170
+ "logps/chosen": -432.98486328125,
171
+ "logps/rejected": -553.6598510742188,
172
+ "loss": 0.305,
173
+ "rewards/accuracies": 0.6875,
174
+ "rewards/chosen": -0.11783289909362793,
175
+ "rewards/margins": 0.08183668553829193,
176
+ "rewards/rejected": -0.19966959953308105,
177
+ "step": 110
178
+ },
179
+ {
180
+ "epoch": 0.26,
181
+ "learning_rate": 4.63815289945858e-06,
182
+ "logits/chosen": 0.14807412028312683,
183
+ "logits/rejected": 0.21360798180103302,
184
+ "logps/chosen": -565.0419311523438,
185
+ "logps/rejected": -595.3816528320312,
186
+ "loss": 0.3434,
187
+ "rewards/accuracies": 0.612500011920929,
188
+ "rewards/chosen": -0.18253007531166077,
189
+ "rewards/margins": 0.060289014130830765,
190
+ "rewards/rejected": -0.24281907081604004,
191
+ "step": 120
192
+ },
193
+ {
194
+ "epoch": 0.28,
195
+ "learning_rate": 4.535619761282989e-06,
196
+ "logits/chosen": 0.10818658024072647,
197
+ "logits/rejected": 0.18194182217121124,
198
+ "logps/chosen": -462.3949279785156,
199
+ "logps/rejected": -513.8490600585938,
200
+ "loss": 0.3173,
201
+ "rewards/accuracies": 0.6625000238418579,
202
+ "rewards/chosen": -0.1326686441898346,
203
+ "rewards/margins": 0.0804656594991684,
204
+ "rewards/rejected": -0.2131342887878418,
205
+ "step": 130
206
+ },
207
+ {
208
+ "epoch": 0.3,
209
+ "learning_rate": 4.42175660319555e-06,
210
+ "logits/chosen": 0.13493295013904572,
211
+ "logits/rejected": 0.22737479209899902,
212
+ "logps/chosen": -532.4095458984375,
213
+ "logps/rejected": -585.3455200195312,
214
+ "loss": 0.3196,
215
+ "rewards/accuracies": 0.6499999761581421,
216
+ "rewards/chosen": -0.16629299521446228,
217
+ "rewards/margins": 0.08414360880851746,
218
+ "rewards/rejected": -0.25043657422065735,
219
+ "step": 140
220
+ },
221
+ {
222
+ "epoch": 0.32,
223
+ "learning_rate": 4.297197174127619e-06,
224
+ "logits/chosen": 0.17478415369987488,
225
+ "logits/rejected": 0.24990789592266083,
226
+ "logps/chosen": -479.77862548828125,
227
+ "logps/rejected": -553.7377319335938,
228
+ "loss": 0.3207,
229
+ "rewards/accuracies": 0.6187499761581421,
230
+ "rewards/chosen": -0.14474426209926605,
231
+ "rewards/margins": 0.09155549854040146,
232
+ "rewards/rejected": -0.2362997829914093,
233
+ "step": 150
234
+ },
235
+ {
236
+ "epoch": 0.34,
237
+ "learning_rate": 4.162634757195418e-06,
238
+ "logits/chosen": 0.13402113318443298,
239
+ "logits/rejected": 0.2551622688770294,
240
+ "logps/chosen": -496.40081787109375,
241
+ "logps/rejected": -558.84326171875,
242
+ "loss": 0.3187,
243
+ "rewards/accuracies": 0.65625,
244
+ "rewards/chosen": -0.13809171319007874,
245
+ "rewards/margins": 0.08086591213941574,
246
+ "rewards/rejected": -0.21895763278007507,
247
+ "step": 160
248
+ },
249
+ {
250
+ "epoch": 0.36,
251
+ "learning_rate": 4.018818310967843e-06,
252
+ "logits/chosen": 0.12252243608236313,
253
+ "logits/rejected": 0.16481925547122955,
254
+ "logps/chosen": -484.56353759765625,
255
+ "logps/rejected": -554.2274780273438,
256
+ "loss": 0.3255,
257
+ "rewards/accuracies": 0.6625000238418579,
258
+ "rewards/chosen": -0.14209917187690735,
259
+ "rewards/margins": 0.09574152529239655,
260
+ "rewards/rejected": -0.2378406971693039,
261
+ "step": 170
262
+ },
263
+ {
264
+ "epoch": 0.38,
265
+ "learning_rate": 3.866548300851254e-06,
266
+ "logits/chosen": 0.08216498792171478,
267
+ "logits/rejected": 0.17952165007591248,
268
+ "logps/chosen": -482.576171875,
269
+ "logps/rejected": -579.781982421875,
270
+ "loss": 0.3047,
271
+ "rewards/accuracies": 0.637499988079071,
272
+ "rewards/chosen": -0.14626096189022064,
273
+ "rewards/margins": 0.08570893108844757,
274
+ "rewards/rejected": -0.23196987807750702,
275
+ "step": 180
276
+ },
277
+ {
278
+ "epoch": 0.41,
279
+ "learning_rate": 3.706672243793271e-06,
280
+ "logits/chosen": 0.07855963706970215,
281
+ "logits/rejected": 0.07844971120357513,
282
+ "logps/chosen": -468.69061279296875,
283
+ "logps/rejected": -544.0549926757812,
284
+ "loss": 0.2935,
285
+ "rewards/accuracies": 0.6875,
286
+ "rewards/chosen": -0.1354297697544098,
287
+ "rewards/margins": 0.09144213050603867,
288
+ "rewards/rejected": -0.22687189280986786,
289
+ "step": 190
290
+ },
291
+ {
292
+ "epoch": 0.43,
293
+ "learning_rate": 3.5400799911032357e-06,
294
+ "logits/chosen": 0.10545216500759125,
295
+ "logits/rejected": 0.1882828176021576,
296
+ "logps/chosen": -501.1815490722656,
297
+ "logps/rejected": -601.7250366210938,
298
+ "loss": 0.3041,
299
+ "rewards/accuracies": 0.6312500238418579,
300
+ "rewards/chosen": -0.15607957541942596,
301
+ "rewards/margins": 0.10436417162418365,
302
+ "rewards/rejected": -0.2604437470436096,
303
+ "step": 200
304
+ },
305
+ {
306
+ "epoch": 0.45,
307
+ "learning_rate": 3.3676987756445894e-06,
308
+ "logits/chosen": 0.10487540811300278,
309
+ "logits/rejected": 0.11818546056747437,
310
+ "logps/chosen": -470.6344299316406,
311
+ "logps/rejected": -565.8145751953125,
312
+ "loss": 0.3148,
313
+ "rewards/accuracies": 0.612500011920929,
314
+ "rewards/chosen": -0.16682696342468262,
315
+ "rewards/margins": 0.09452913701534271,
316
+ "rewards/rejected": -0.26135605573654175,
317
+ "step": 210
318
+ },
319
+ {
320
+ "epoch": 0.47,
321
+ "learning_rate": 3.1904880509659397e-06,
322
+ "logits/chosen": 0.13482534885406494,
323
+ "logits/rejected": 0.20024776458740234,
324
+ "logps/chosen": -510.10528564453125,
325
+ "logps/rejected": -605.6468505859375,
326
+ "loss": 0.3172,
327
+ "rewards/accuracies": 0.6812499761581421,
328
+ "rewards/chosen": -0.16119703650474548,
329
+ "rewards/margins": 0.1006912812590599,
330
+ "rewards/rejected": -0.261888325214386,
331
+ "step": 220
332
+ },
333
+ {
334
+ "epoch": 0.49,
335
+ "learning_rate": 3.0094341510955697e-06,
336
+ "logits/chosen": 0.10005593299865723,
337
+ "logits/rejected": 0.1791614145040512,
338
+ "logps/chosen": -532.1925048828125,
339
+ "logps/rejected": -624.0726318359375,
340
+ "loss": 0.3106,
341
+ "rewards/accuracies": 0.668749988079071,
342
+ "rewards/chosen": -0.17168700695037842,
343
+ "rewards/margins": 0.09216924011707306,
344
+ "rewards/rejected": -0.26385626196861267,
345
+ "step": 230
346
+ },
347
+ {
348
+ "epoch": 0.51,
349
+ "learning_rate": 2.825544800722376e-06,
350
+ "logits/chosen": 0.10918021202087402,
351
+ "logits/rejected": 0.18382051587104797,
352
+ "logps/chosen": -508.88494873046875,
353
+ "logps/rejected": -571.0011596679688,
354
+ "loss": 0.309,
355
+ "rewards/accuracies": 0.637499988079071,
356
+ "rewards/chosen": -0.15795882046222687,
357
+ "rewards/margins": 0.09128745645284653,
358
+ "rewards/rejected": -0.2492462694644928,
359
+ "step": 240
360
+ },
361
+ {
362
+ "epoch": 0.53,
363
+ "learning_rate": 2.639843506318899e-06,
364
+ "logits/chosen": 0.12444597482681274,
365
+ "logits/rejected": 0.16049379110336304,
366
+ "logps/chosen": -509.65631103515625,
367
+ "logps/rejected": -609.0398559570312,
368
+ "loss": 0.2924,
369
+ "rewards/accuracies": 0.6937500238418579,
370
+ "rewards/chosen": -0.1560695320367813,
371
+ "rewards/margins": 0.11202052980661392,
372
+ "rewards/rejected": -0.26809003949165344,
373
+ "step": 250
374
+ },
375
+ {
376
+ "epoch": 0.55,
377
+ "learning_rate": 2.4533638594248094e-06,
378
+ "logits/chosen": 0.11506851017475128,
379
+ "logits/rejected": 0.1052849292755127,
380
+ "logps/chosen": -535.1851806640625,
381
+ "logps/rejected": -609.4058837890625,
382
+ "loss": 0.3139,
383
+ "rewards/accuracies": 0.699999988079071,
384
+ "rewards/chosen": -0.18579542636871338,
385
+ "rewards/margins": 0.10177616029977798,
386
+ "rewards/rejected": -0.28757157921791077,
387
+ "step": 260
388
+ },
389
+ {
390
+ "epoch": 0.58,
391
+ "learning_rate": 2.2671437837980943e-06,
392
+ "logits/chosen": 0.11827238649129868,
393
+ "logits/rejected": 0.12189098447561264,
394
+ "logps/chosen": -542.7433471679688,
395
+ "logps/rejected": -633.5802612304688,
396
+ "loss": 0.2867,
397
+ "rewards/accuracies": 0.606249988079071,
398
+ "rewards/chosen": -0.19100908935070038,
399
+ "rewards/margins": 0.09670811891555786,
400
+ "rewards/rejected": -0.28771719336509705,
401
+ "step": 270
402
+ },
403
+ {
404
+ "epoch": 0.6,
405
+ "learning_rate": 2.082219758453629e-06,
406
+ "logits/chosen": 0.0904841274023056,
407
+ "logits/rejected": 0.20232203602790833,
408
+ "logps/chosen": -575.5374755859375,
409
+ "logps/rejected": -672.4290161132812,
410
+ "loss": 0.3105,
411
+ "rewards/accuracies": 0.6812499761581421,
412
+ "rewards/chosen": -0.2171885222196579,
413
+ "rewards/margins": 0.10769243538379669,
414
+ "rewards/rejected": -0.3248809278011322,
415
+ "step": 280
416
+ },
417
+ {
418
+ "epoch": 0.62,
419
+ "learning_rate": 1.899621048743019e-06,
420
+ "logits/chosen": 0.07801838964223862,
421
+ "logits/rejected": 0.16570156812667847,
422
+ "logps/chosen": -556.6237182617188,
423
+ "logps/rejected": -643.4578857421875,
424
+ "loss": 0.3145,
425
+ "rewards/accuracies": 0.6937500238418579,
426
+ "rewards/chosen": -0.20063337683677673,
427
+ "rewards/margins": 0.1107235923409462,
428
+ "rewards/rejected": -0.31135696172714233,
429
+ "step": 290
430
+ },
431
+ {
432
+ "epoch": 0.64,
433
+ "learning_rate": 1.7203639775848423e-06,
434
+ "logits/chosen": 0.07458348572254181,
435
+ "logits/rejected": 0.08251482248306274,
436
+ "logps/chosen": -524.4200439453125,
437
+ "logps/rejected": -666.1168823242188,
438
+ "loss": 0.2934,
439
+ "rewards/accuracies": 0.668749988079071,
440
+ "rewards/chosen": -0.18233875930309296,
441
+ "rewards/margins": 0.13505366444587708,
442
+ "rewards/rejected": -0.3173924386501312,
443
+ "step": 300
444
+ },
445
+ {
446
+ "epoch": 0.66,
447
+ "learning_rate": 1.5454462687309445e-06,
448
+ "logits/chosen": 0.06379405409097672,
449
+ "logits/rejected": 0.14568018913269043,
450
+ "logps/chosen": -504.7530822753906,
451
+ "logps/rejected": -650.941650390625,
452
+ "loss": 0.2814,
453
+ "rewards/accuracies": 0.675000011920929,
454
+ "rewards/chosen": -0.17345736920833588,
455
+ "rewards/margins": 0.1400579959154129,
456
+ "rewards/rejected": -0.3135153353214264,
457
+ "step": 310
458
+ },
459
+ {
460
+ "epoch": 0.68,
461
+ "learning_rate": 1.3758414935535147e-06,
462
+ "logits/chosen": 0.0773477703332901,
463
+ "logits/rejected": 0.12101063877344131,
464
+ "logps/chosen": -536.0679931640625,
465
+ "logps/rejected": -659.2662353515625,
466
+ "loss": 0.3014,
467
+ "rewards/accuracies": 0.71875,
468
+ "rewards/chosen": -0.18032298982143402,
469
+ "rewards/margins": 0.11869337409734726,
470
+ "rewards/rejected": -0.29901641607284546,
471
+ "step": 320
472
+ },
473
+ {
474
+ "epoch": 0.7,
475
+ "learning_rate": 1.2124936522614622e-06,
476
+ "logits/chosen": 0.09862785786390305,
477
+ "logits/rejected": 0.14109982550144196,
478
+ "logps/chosen": -487.79071044921875,
479
+ "logps/rejected": -610.7267456054688,
480
+ "loss": 0.2935,
481
+ "rewards/accuracies": 0.65625,
482
+ "rewards/chosen": -0.18224193155765533,
483
+ "rewards/margins": 0.10070188343524933,
484
+ "rewards/rejected": -0.2829437851905823,
485
+ "step": 330
486
+ },
487
+ {
488
+ "epoch": 0.73,
489
+ "learning_rate": 1.0563119197063934e-06,
490
+ "logits/chosen": 0.09601452201604843,
491
+ "logits/rejected": 0.13355228304862976,
492
+ "logps/chosen": -478.78143310546875,
493
+ "logps/rejected": -622.6370239257812,
494
+ "loss": 0.31,
495
+ "rewards/accuracies": 0.668749988079071,
496
+ "rewards/chosen": -0.17518463730812073,
497
+ "rewards/margins": 0.11871640384197235,
498
+ "rewards/rejected": -0.2939010262489319,
499
+ "step": 340
500
+ },
501
+ {
502
+ "epoch": 0.75,
503
+ "learning_rate": 9.081655850224449e-07,
504
+ "logits/chosen": 0.14273716509342194,
505
+ "logits/rejected": 0.12122112512588501,
506
+ "logps/chosen": -535.0431518554688,
507
+ "logps/rejected": -643.0104370117188,
508
+ "loss": 0.3196,
509
+ "rewards/accuracies": 0.675000011920929,
510
+ "rewards/chosen": -0.19643327593803406,
511
+ "rewards/margins": 0.10987784713506699,
512
+ "rewards/rejected": -0.30631113052368164,
513
+ "step": 350
514
+ },
515
+ {
516
+ "epoch": 0.77,
517
+ "learning_rate": 7.688792132653111e-07,
518
+ "logits/chosen": 0.1597367525100708,
519
+ "logits/rejected": 0.18071278929710388,
520
+ "logps/chosen": -597.0608520507812,
521
+ "logps/rejected": -692.2689208984375,
522
+ "loss": 0.2965,
523
+ "rewards/accuracies": 0.699999988079071,
524
+ "rewards/chosen": -0.20307877659797668,
525
+ "rewards/margins": 0.12918424606323242,
526
+ "rewards/rejected": -0.3322630524635315,
527
+ "step": 360
528
+ },
529
+ {
530
+ "epoch": 0.79,
531
+ "learning_rate": 6.392280559802341e-07,
532
+ "logits/chosen": 0.1371072232723236,
533
+ "logits/rejected": 0.16206106543540955,
534
+ "logps/chosen": -573.9052124023438,
535
+ "logps/rejected": -700.1799926757812,
536
+ "loss": 0.2689,
537
+ "rewards/accuracies": 0.706250011920929,
538
+ "rewards/chosen": -0.2099401205778122,
539
+ "rewards/margins": 0.13856378197669983,
540
+ "rewards/rejected": -0.34850388765335083,
541
+ "step": 370
542
+ },
543
+ {
544
+ "epoch": 0.81,
545
+ "learning_rate": 5.199337362431792e-07,
546
+ "logits/chosen": 0.10267746448516846,
547
+ "logits/rejected": 0.1483933925628662,
548
+ "logps/chosen": -554.7241821289062,
549
+ "logps/rejected": -637.0789184570312,
550
+ "loss": 0.2999,
551
+ "rewards/accuracies": 0.71875,
552
+ "rewards/chosen": -0.19773730635643005,
553
+ "rewards/margins": 0.11398313194513321,
554
+ "rewards/rejected": -0.31172046065330505,
555
+ "step": 380
556
+ },
557
+ {
558
+ "epoch": 0.83,
559
+ "learning_rate": 4.1166023219176176e-07,
560
+ "logits/chosen": 0.11247100681066513,
561
+ "logits/rejected": 0.1309679001569748,
562
+ "logps/chosen": -557.1490478515625,
563
+ "logps/rejected": -678.8016357421875,
564
+ "loss": 0.2945,
565
+ "rewards/accuracies": 0.6875,
566
+ "rewards/chosen": -0.1877584308385849,
567
+ "rewards/margins": 0.12902548909187317,
568
+ "rewards/rejected": -0.3167839050292969,
569
+ "step": 390
570
+ },
571
+ {
572
+ "epoch": 0.85,
573
+ "learning_rate": 3.150101814011136e-07,
574
+ "logits/chosen": 0.16857033967971802,
575
+ "logits/rejected": 0.18106935918331146,
576
+ "logps/chosen": -571.0828857421875,
577
+ "logps/rejected": -638.4654541015625,
578
+ "loss": 0.3045,
579
+ "rewards/accuracies": 0.668749988079071,
580
+ "rewards/chosen": -0.19992712140083313,
581
+ "rewards/margins": 0.10413169860839844,
582
+ "rewards/rejected": -0.30405884981155396,
583
+ "step": 400
584
+ },
585
+ {
586
+ "epoch": 0.87,
587
+ "learning_rate": 2.3052152667409289e-07,
588
+ "logits/chosen": 0.09823437035083771,
589
+ "logits/rejected": 0.23346427083015442,
590
+ "logps/chosen": -537.1575317382812,
591
+ "logps/rejected": -657.79931640625,
592
+ "loss": 0.3101,
593
+ "rewards/accuracies": 0.637499988079071,
594
+ "rewards/chosen": -0.19983352720737457,
595
+ "rewards/margins": 0.11520209163427353,
596
+ "rewards/rejected": -0.3150356113910675,
597
+ "step": 410
598
+ },
599
+ {
600
+ "epoch": 0.9,
601
+ "learning_rate": 1.5866452191498488e-07,
602
+ "logits/chosen": 0.13151055574417114,
603
+ "logits/rejected": 0.17384907603263855,
604
+ "logps/chosen": -562.8436279296875,
605
+ "logps/rejected": -688.2677612304688,
606
+ "loss": 0.2892,
607
+ "rewards/accuracies": 0.6499999761581421,
608
+ "rewards/chosen": -0.21342253684997559,
609
+ "rewards/margins": 0.1261134147644043,
610
+ "rewards/rejected": -0.3395359516143799,
611
+ "step": 420
612
+ },
613
+ {
614
+ "epoch": 0.92,
615
+ "learning_rate": 9.983911475163727e-08,
616
+ "logits/chosen": 0.11608059704303741,
617
+ "logits/rejected": 0.13708294928073883,
618
+ "logps/chosen": -503.0716857910156,
619
+ "logps/rejected": -606.0911865234375,
620
+ "loss": 0.2983,
621
+ "rewards/accuracies": 0.6499999761581421,
622
+ "rewards/chosen": -0.18081562221050262,
623
+ "rewards/margins": 0.11758317053318024,
624
+ "rewards/rejected": -0.29839879274368286,
625
+ "step": 430
626
+ },
627
+ {
628
+ "epoch": 0.94,
629
+ "learning_rate": 5.437272047405712e-08,
630
+ "logits/chosen": 0.11100079119205475,
631
+ "logits/rejected": 0.13695240020751953,
632
+ "logps/chosen": -530.9528198242188,
633
+ "logps/rejected": -668.3643798828125,
634
+ "loss": 0.3095,
635
+ "rewards/accuracies": 0.668749988079071,
636
+ "rewards/chosen": -0.2087739259004593,
637
+ "rewards/margins": 0.13447535037994385,
638
+ "rewards/rejected": -0.34324929118156433,
639
+ "step": 440
640
+ },
641
+ {
642
+ "epoch": 0.96,
643
+ "learning_rate": 2.251839967945535e-08,
644
+ "logits/chosen": 0.022059569135308266,
645
+ "logits/rejected": 0.12616530060768127,
646
+ "logps/chosen": -492.80181884765625,
647
+ "logps/rejected": -643.1315307617188,
648
+ "loss": 0.2834,
649
+ "rewards/accuracies": 0.706250011920929,
650
+ "rewards/chosen": -0.18623578548431396,
651
+ "rewards/margins": 0.14532844722270966,
652
+ "rewards/rejected": -0.3315642178058624,
653
+ "step": 450
654
+ },
655
+ {
656
+ "epoch": 0.98,
657
+ "learning_rate": 4.453449766758933e-09,
658
+ "logits/chosen": 0.10801200568675995,
659
+ "logits/rejected": 0.11458040773868561,
660
+ "logps/chosen": -531.0709228515625,
661
+ "logps/rejected": -645.5343627929688,
662
+ "loss": 0.3043,
663
+ "rewards/accuracies": 0.6937500238418579,
664
+ "rewards/chosen": -0.1950865238904953,
665
+ "rewards/margins": 0.11565764993429184,
666
+ "rewards/rejected": -0.3107442259788513,
667
+ "step": 460
668
+ },
669
+ {
670
+ "epoch": 1.0,
671
+ "step": 468,
672
+ "total_flos": 0.0,
673
+ "train_loss": 0.2399272450015076,
674
+ "train_runtime": 4417.3525,
675
+ "train_samples_per_second": 3.396,
676
+ "train_steps_per_second": 0.106
677
+ }
678
+ ],
679
+ "logging_steps": 10,
680
+ "max_steps": 468,
681
+ "num_input_tokens_seen": 0,
682
+ "num_train_epochs": 1,
683
+ "save_steps": 100,
684
+ "total_flos": 0.0,
685
+ "train_batch_size": 4,
686
+ "trial_name": null,
687
+ "trial_params": null
688
+ }