jikaixuan commited on
Commit
bbea48f
1 Parent(s): 924c46f

Model save

Browse files
Files changed (6) hide show
  1. README.md +14 -14
  2. adapter_model.safetensors +1 -1
  3. all_results.json +18 -18
  4. eval_results.json +14 -14
  5. train_results.json +4 -4
  6. trainer_state.json +1403 -635
README.md CHANGED
@@ -15,17 +15,17 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.5199
19
- - Rewards/chosen: -0.1238
20
- - Rewards/rejected: -1.1258
21
- - Rewards/accuracies: 0.7300
22
- - Rewards/margins: 1.0020
23
- - Logps/rejected: -270.5574
24
- - Logps/chosen: -285.4951
25
- - Logits/rejected: -2.8178
26
- - Logits/chosen: -2.8221
27
- - Use Label: 0.0
28
- - Pred Label: 0.0
29
 
30
  ## Model description
31
 
@@ -50,8 +50,8 @@ The following hyperparameters were used during training:
50
  - seed: 42
51
  - distributed_type: multi-GPU
52
  - num_devices: 4
53
- - gradient_accumulation_steps: 8
54
- - total_train_batch_size: 128
55
  - total_eval_batch_size: 16
56
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
57
  - lr_scheduler_type: linear
@@ -62,7 +62,7 @@ The following hyperparameters were used during training:
62
 
63
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
64
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
65
- | 0.4951 | 1.0 | 477 | 0.5199 | -0.1238 | -1.1258 | 0.7300 | 1.0020 | -270.5574 | -285.4951 | -2.8178 | -2.8221 | 0.0 | 0.0 |
66
 
67
 
68
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.0116
19
+ - Rewards/chosen: -1343.7761
20
+ - Rewards/rejected: -1133.7241
21
+ - Rewards/accuracies: 0.4740
22
+ - Rewards/margins: -210.0521
23
+ - Logps/rejected: -11596.5400
24
+ - Logps/chosen: -13722.0166
25
+ - Logits/rejected: 13.8132
26
+ - Logits/chosen: 13.8244
27
+ - Use Label: 1746.1600
28
+ - Pred Label: 14285.8398
29
 
30
  ## Model description
31
 
 
50
  - seed: 42
51
  - distributed_type: multi-GPU
52
  - num_devices: 4
53
+ - gradient_accumulation_steps: 4
54
+ - total_train_batch_size: 64
55
  - total_eval_batch_size: 16
56
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
57
  - lr_scheduler_type: linear
 
62
 
63
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
64
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
65
+ | 0.0144 | 1.0 | 955 | 0.0116 | -1343.7761 | -1133.7241 | 0.4740 | -210.0521 | -11596.5400 | -13722.0166 | 13.8132 | 13.8244 | 1742.1600 | 13789.8398 |
66
 
67
 
68
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:010a45605f37a25b58670b8f7dc01da25be5c46d27e9088558ce7270eeaff907
3
  size 218138576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81368c4cfad165c13cb766c3a194e4536b96a4e23b23db140f17c248244027e5
3
  size 218138576
all_results.json CHANGED
@@ -1,23 +1,23 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -2.822110891342163,
4
- "eval_logits/rejected": -2.8178136348724365,
5
- "eval_logps/chosen": -285.4951171875,
6
- "eval_logps/rejected": -270.55743408203125,
7
- "eval_loss": 0.5198934078216553,
8
- "eval_pred_label": 0.0,
9
- "eval_rewards/accuracies": 0.7300000190734863,
10
- "eval_rewards/chosen": -0.12377375364303589,
11
- "eval_rewards/margins": 1.001997470855713,
12
- "eval_rewards/rejected": -1.1257712841033936,
13
- "eval_runtime": 453.8128,
14
  "eval_samples": 2000,
15
- "eval_samples_per_second": 4.407,
16
- "eval_steps_per_second": 0.275,
17
- "eval_use_label": 0.0,
18
- "train_loss": 0.5321606655040877,
19
- "train_runtime": 24451.2028,
20
  "train_samples": 61135,
21
- "train_samples_per_second": 2.5,
22
- "train_steps_per_second": 0.02
23
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": 13.824411392211914,
4
+ "eval_logits/rejected": 13.813151359558105,
5
+ "eval_logps/chosen": -13722.0166015625,
6
+ "eval_logps/rejected": -11596.5400390625,
7
+ "eval_loss": 0.011624496430158615,
8
+ "eval_pred_label": 14285.83984375,
9
+ "eval_rewards/accuracies": 0.4740000069141388,
10
+ "eval_rewards/chosen": -1343.776123046875,
11
+ "eval_rewards/margins": -210.05210876464844,
12
+ "eval_rewards/rejected": -1133.72412109375,
13
+ "eval_runtime": 450.0493,
14
  "eval_samples": 2000,
15
+ "eval_samples_per_second": 4.444,
16
+ "eval_steps_per_second": 0.278,
17
+ "eval_use_label": 1746.1600341796875,
18
+ "train_loss": 0.08065580570807007,
19
+ "train_runtime": 25025.0638,
20
  "train_samples": 61135,
21
+ "train_samples_per_second": 2.443,
22
+ "train_steps_per_second": 0.038
23
  }
eval_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -2.822110891342163,
4
- "eval_logits/rejected": -2.8178136348724365,
5
- "eval_logps/chosen": -285.4951171875,
6
- "eval_logps/rejected": -270.55743408203125,
7
- "eval_loss": 0.5198934078216553,
8
- "eval_pred_label": 0.0,
9
- "eval_rewards/accuracies": 0.7300000190734863,
10
- "eval_rewards/chosen": -0.12377375364303589,
11
- "eval_rewards/margins": 1.001997470855713,
12
- "eval_rewards/rejected": -1.1257712841033936,
13
- "eval_runtime": 453.8128,
14
  "eval_samples": 2000,
15
- "eval_samples_per_second": 4.407,
16
- "eval_steps_per_second": 0.275,
17
- "eval_use_label": 0.0
18
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": 13.824411392211914,
4
+ "eval_logits/rejected": 13.813151359558105,
5
+ "eval_logps/chosen": -13722.0166015625,
6
+ "eval_logps/rejected": -11596.5400390625,
7
+ "eval_loss": 0.011624496430158615,
8
+ "eval_pred_label": 14285.83984375,
9
+ "eval_rewards/accuracies": 0.4740000069141388,
10
+ "eval_rewards/chosen": -1343.776123046875,
11
+ "eval_rewards/margins": -210.05210876464844,
12
+ "eval_rewards/rejected": -1133.72412109375,
13
+ "eval_runtime": 450.0493,
14
  "eval_samples": 2000,
15
+ "eval_samples_per_second": 4.444,
16
+ "eval_steps_per_second": 0.278,
17
+ "eval_use_label": 1746.1600341796875
18
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.5321606655040877,
4
- "train_runtime": 24451.2028,
5
  "train_samples": 61135,
6
- "train_samples_per_second": 2.5,
7
- "train_steps_per_second": 0.02
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.08065580570807007,
4
+ "train_runtime": 25025.0638,
5
  "train_samples": 61135,
6
+ "train_samples_per_second": 2.443,
7
+ "train_steps_per_second": 0.038
8
  }
trainer_state.json CHANGED
@@ -1,20 +1,20 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.998691442030882,
5
  "eval_steps": 100,
6
- "global_step": 477,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
- "learning_rate": 1.0416666666666667e-06,
14
- "logits/chosen": -2.9089105129241943,
15
- "logits/rejected": -2.8982176780700684,
16
- "logps/chosen": -328.48699951171875,
17
- "logps/rejected": -294.0901794433594,
18
  "loss": 0.6931,
19
  "pred_label": 0.0,
20
  "rewards/accuracies": 0.0,
@@ -22,790 +22,1558 @@
22
  "rewards/margins": 0.0,
23
  "rewards/rejected": 0.0,
24
  "step": 1,
25
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  },
27
  {
28
  "epoch": 0.02,
29
  "learning_rate": 1.0416666666666668e-05,
30
- "logits/chosen": -2.802924633026123,
31
- "logits/rejected": -2.845370292663574,
32
- "logps/chosen": -274.2305908203125,
33
- "logps/rejected": -257.4792785644531,
34
- "loss": 0.6894,
35
  "pred_label": 0.0,
36
- "rewards/accuracies": 0.4965277910232544,
37
- "rewards/chosen": 0.0021643945947289467,
38
- "rewards/margins": 0.006878577638417482,
39
- "rewards/rejected": -0.004714183043688536,
40
- "step": 10,
41
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  },
43
  {
44
  "epoch": 0.04,
45
  "learning_rate": 2.0833333333333336e-05,
46
- "logits/chosen": -2.8522541522979736,
47
- "logits/rejected": -2.842360019683838,
48
- "logps/chosen": -279.8498840332031,
49
- "logps/rejected": -264.8490905761719,
50
- "loss": 0.6557,
51
- "pred_label": 0.0,
52
- "rewards/accuracies": 0.65625,
53
- "rewards/chosen": 0.02565639279782772,
54
- "rewards/margins": 0.10996748507022858,
55
- "rewards/rejected": -0.0843110978603363,
56
- "step": 20,
57
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  },
59
  {
60
  "epoch": 0.06,
61
  "learning_rate": 3.125e-05,
62
- "logits/chosen": -2.8415942192077637,
63
- "logits/rejected": -2.829390525817871,
64
- "logps/chosen": -283.43377685546875,
65
- "logps/rejected": -275.16021728515625,
66
- "loss": 0.6152,
67
- "pred_label": 0.0,
68
- "rewards/accuracies": 0.668749988079071,
69
- "rewards/chosen": -0.017193807289004326,
70
- "rewards/margins": 0.2977373003959656,
71
- "rewards/rejected": -0.31493109464645386,
72
- "step": 30,
73
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  },
75
  {
76
  "epoch": 0.08,
77
  "learning_rate": 4.166666666666667e-05,
78
- "logits/chosen": -2.808344841003418,
79
- "logits/rejected": -2.8009369373321533,
80
- "logps/chosen": -299.697998046875,
81
- "logps/rejected": -281.28240966796875,
82
- "loss": 0.5509,
83
- "pred_label": 0.0,
84
- "rewards/accuracies": 0.734375,
85
- "rewards/chosen": 0.011143045499920845,
86
- "rewards/margins": 0.5404817461967468,
87
- "rewards/rejected": -0.5293387174606323,
88
- "step": 40,
89
- "use_label": 0.0
90
  },
91
  {
92
- "epoch": 0.1,
93
- "learning_rate": 4.976689976689977e-05,
94
- "logits/chosen": -2.735532283782959,
95
- "logits/rejected": -2.762310743331909,
96
- "logps/chosen": -284.2992248535156,
97
- "logps/rejected": -285.3406982421875,
98
- "loss": 0.5757,
99
- "pred_label": 0.0,
100
  "rewards/accuracies": 0.731249988079071,
101
- "rewards/chosen": 0.06582433730363846,
102
- "rewards/margins": 0.6093484163284302,
103
- "rewards/rejected": -0.5435240864753723,
104
- "step": 50,
105
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  },
107
  {
108
  "epoch": 0.13,
109
- "learning_rate": 4.86013986013986e-05,
110
- "logits/chosen": -2.7604079246520996,
111
- "logits/rejected": -2.749760389328003,
112
- "logps/chosen": -271.2710876464844,
113
- "logps/rejected": -253.1558074951172,
114
- "loss": 0.5778,
115
- "pred_label": 0.0,
116
- "rewards/accuracies": 0.690625011920929,
117
- "rewards/chosen": 0.08226754516363144,
118
- "rewards/margins": 0.5549899935722351,
119
- "rewards/rejected": -0.47272244095802307,
120
- "step": 60,
121
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  },
123
  {
124
  "epoch": 0.15,
125
- "learning_rate": 4.7435897435897435e-05,
126
- "logits/chosen": -2.825266122817993,
127
- "logits/rejected": -2.8063371181488037,
128
- "logps/chosen": -282.8874816894531,
129
- "logps/rejected": -260.37786865234375,
130
- "loss": 0.5591,
131
- "pred_label": 0.0,
132
- "rewards/accuracies": 0.6968749761581421,
133
- "rewards/chosen": -0.020037399604916573,
134
- "rewards/margins": 0.611237108707428,
135
- "rewards/rejected": -0.6312744617462158,
136
- "step": 70,
137
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  },
139
  {
140
  "epoch": 0.17,
141
- "learning_rate": 4.6270396270396274e-05,
142
- "logits/chosen": -2.777280807495117,
143
- "logits/rejected": -2.760341167449951,
144
- "logps/chosen": -278.3568420410156,
145
- "logps/rejected": -281.4912109375,
146
- "loss": 0.5484,
147
- "pred_label": 0.0,
148
- "rewards/accuracies": 0.6812499761581421,
149
- "rewards/chosen": 0.02771860361099243,
150
- "rewards/margins": 0.6373428702354431,
151
- "rewards/rejected": -0.6096242666244507,
152
- "step": 80,
153
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  },
155
  {
156
  "epoch": 0.19,
157
- "learning_rate": 4.5104895104895105e-05,
158
- "logits/chosen": -2.7895703315734863,
159
- "logits/rejected": -2.7826099395751953,
160
- "logps/chosen": -274.6077575683594,
161
- "logps/rejected": -265.6002197265625,
162
- "loss": 0.5476,
163
- "pred_label": 0.0,
164
- "rewards/accuracies": 0.71875,
165
- "rewards/chosen": 0.15948975086212158,
166
- "rewards/margins": 0.703137218952179,
167
- "rewards/rejected": -0.5436475276947021,
168
- "step": 90,
169
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  },
171
  {
172
  "epoch": 0.21,
173
- "learning_rate": 4.3939393939393944e-05,
174
- "logits/chosen": -2.842555046081543,
175
- "logits/rejected": -2.8219830989837646,
176
- "logps/chosen": -282.922607421875,
177
- "logps/rejected": -272.0762939453125,
178
- "loss": 0.5218,
179
- "pred_label": 0.0,
180
- "rewards/accuracies": 0.753125011920929,
181
- "rewards/chosen": 0.1369757205247879,
182
- "rewards/margins": 0.8804744482040405,
183
- "rewards/rejected": -0.7434987425804138,
184
- "step": 100,
185
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  },
187
  {
188
  "epoch": 0.23,
189
- "learning_rate": 4.2773892773892776e-05,
190
- "logits/chosen": -2.7973036766052246,
191
- "logits/rejected": -2.8159468173980713,
192
- "logps/chosen": -287.54254150390625,
193
- "logps/rejected": -269.5066223144531,
194
- "loss": 0.5455,
195
- "pred_label": 0.0,
196
- "rewards/accuracies": 0.734375,
197
- "rewards/chosen": 0.057723164558410645,
198
- "rewards/margins": 0.7660588622093201,
199
- "rewards/rejected": -0.7083355784416199,
200
- "step": 110,
201
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  },
203
  {
204
  "epoch": 0.25,
205
- "learning_rate": 4.1608391608391614e-05,
206
- "logits/chosen": -2.7736809253692627,
207
- "logits/rejected": -2.7753469944000244,
208
- "logps/chosen": -292.09246826171875,
209
- "logps/rejected": -252.3135986328125,
210
- "loss": 0.5353,
211
- "pred_label": 0.0,
212
- "rewards/accuracies": 0.7250000238418579,
213
- "rewards/chosen": -0.07172087579965591,
214
- "rewards/margins": 0.8031927347183228,
215
- "rewards/rejected": -0.8749135732650757,
216
- "step": 120,
217
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  },
219
  {
220
  "epoch": 0.27,
221
- "learning_rate": 4.0442890442890446e-05,
222
- "logits/chosen": -2.7675487995147705,
223
- "logits/rejected": -2.761944532394409,
224
- "logps/chosen": -271.2415771484375,
225
- "logps/rejected": -269.3936767578125,
226
- "loss": 0.5413,
227
- "pred_label": 0.0,
228
- "rewards/accuracies": 0.703125,
229
- "rewards/chosen": -0.16168196499347687,
230
- "rewards/margins": 0.7922881841659546,
231
- "rewards/rejected": -0.9539702534675598,
232
- "step": 130,
233
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  },
235
  {
236
  "epoch": 0.29,
237
- "learning_rate": 3.9277389277389285e-05,
238
- "logits/chosen": -2.821808338165283,
239
- "logits/rejected": -2.828434467315674,
240
- "logps/chosen": -293.92376708984375,
241
- "logps/rejected": -278.75469970703125,
242
- "loss": 0.5249,
243
- "pred_label": 0.0,
244
- "rewards/accuracies": 0.7593749761581421,
245
- "rewards/chosen": -0.10061223804950714,
246
- "rewards/margins": 0.9257994890213013,
247
- "rewards/rejected": -1.026411771774292,
248
- "step": 140,
249
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  },
251
  {
252
  "epoch": 0.31,
253
- "learning_rate": 3.811188811188811e-05,
254
- "logits/chosen": -2.8263511657714844,
255
- "logits/rejected": -2.803342819213867,
256
- "logps/chosen": -280.3300476074219,
257
- "logps/rejected": -244.6310577392578,
258
- "loss": 0.5312,
259
- "pred_label": 0.0,
260
- "rewards/accuracies": 0.721875011920929,
261
- "rewards/chosen": -0.2029426395893097,
262
- "rewards/margins": 0.7571284174919128,
263
- "rewards/rejected": -0.9600710868835449,
264
- "step": 150,
265
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  },
267
  {
268
  "epoch": 0.33,
269
- "learning_rate": 3.694638694638695e-05,
270
- "logits/chosen": -2.7946958541870117,
271
- "logits/rejected": -2.748753309249878,
272
- "logps/chosen": -282.52154541015625,
273
- "logps/rejected": -296.4395446777344,
274
- "loss": 0.5058,
275
- "pred_label": 0.0,
276
- "rewards/accuracies": 0.762499988079071,
277
- "rewards/chosen": -0.10657407343387604,
278
- "rewards/margins": 1.0698082447052002,
279
- "rewards/rejected": -1.176382303237915,
280
- "step": 160,
281
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  },
283
  {
284
  "epoch": 0.36,
285
- "learning_rate": 3.578088578088578e-05,
286
- "logits/chosen": -2.7990992069244385,
287
- "logits/rejected": -2.7839698791503906,
288
- "logps/chosen": -280.283447265625,
289
- "logps/rejected": -245.1365966796875,
290
- "loss": 0.5741,
291
- "pred_label": 0.0,
292
- "rewards/accuracies": 0.675000011920929,
293
- "rewards/chosen": -0.0219185221940279,
294
- "rewards/margins": 0.7351241707801819,
295
- "rewards/rejected": -0.7570425868034363,
296
- "step": 170,
297
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  },
299
  {
300
  "epoch": 0.38,
301
- "learning_rate": 3.461538461538462e-05,
302
- "logits/chosen": -2.8004555702209473,
303
- "logits/rejected": -2.80672025680542,
304
- "logps/chosen": -286.1470031738281,
305
- "logps/rejected": -280.5009765625,
306
- "loss": 0.5354,
307
- "pred_label": 0.0,
308
- "rewards/accuracies": 0.768750011920929,
309
- "rewards/chosen": -0.1152573823928833,
310
- "rewards/margins": 0.79168301820755,
311
- "rewards/rejected": -0.9069403409957886,
312
- "step": 180,
313
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
  },
315
  {
316
  "epoch": 0.4,
317
- "learning_rate": 3.344988344988345e-05,
318
- "logits/chosen": -2.820003032684326,
319
- "logits/rejected": -2.789689302444458,
320
- "logps/chosen": -281.3421936035156,
321
- "logps/rejected": -278.36883544921875,
322
- "loss": 0.5092,
323
- "pred_label": 0.0,
324
- "rewards/accuracies": 0.762499988079071,
325
- "rewards/chosen": -0.06256841123104095,
326
- "rewards/margins": 0.9592651128768921,
327
- "rewards/rejected": -1.0218336582183838,
328
- "step": 190,
329
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  },
331
  {
332
  "epoch": 0.42,
333
- "learning_rate": 3.228438228438229e-05,
334
- "logits/chosen": -2.7884361743927,
335
- "logits/rejected": -2.782519578933716,
336
- "logps/chosen": -294.6650390625,
337
- "logps/rejected": -261.40435791015625,
338
- "loss": 0.5052,
339
- "pred_label": 0.0,
340
- "rewards/accuracies": 0.734375,
341
- "rewards/chosen": 0.02564082108438015,
342
- "rewards/margins": 0.9850121736526489,
343
- "rewards/rejected": -0.9593712687492371,
344
- "step": 200,
345
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  },
347
  {
348
  "epoch": 0.44,
349
- "learning_rate": 3.111888111888112e-05,
350
- "logits/chosen": -2.797853946685791,
351
- "logits/rejected": -2.7864465713500977,
352
- "logps/chosen": -269.8060607910156,
353
- "logps/rejected": -262.01422119140625,
354
- "loss": 0.5258,
355
- "pred_label": 0.0,
356
- "rewards/accuracies": 0.706250011920929,
357
- "rewards/chosen": -0.046703118830919266,
358
- "rewards/margins": 0.7715562582015991,
359
- "rewards/rejected": -0.8182594180107117,
360
- "step": 210,
361
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  },
363
  {
364
  "epoch": 0.46,
365
- "learning_rate": 2.9953379953379956e-05,
366
- "logits/chosen": -2.8102259635925293,
367
- "logits/rejected": -2.813842296600342,
368
- "logps/chosen": -280.2008972167969,
369
- "logps/rejected": -262.47161865234375,
370
- "loss": 0.5326,
371
- "pred_label": 0.0,
372
- "rewards/accuracies": 0.7093750238418579,
373
- "rewards/chosen": 0.038443028926849365,
374
- "rewards/margins": 0.6599202156066895,
375
- "rewards/rejected": -0.6214772462844849,
376
- "step": 220,
377
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
  },
379
  {
380
  "epoch": 0.48,
381
- "learning_rate": 2.878787878787879e-05,
382
- "logits/chosen": -2.7520573139190674,
383
- "logits/rejected": -2.744856357574463,
384
- "logps/chosen": -265.61956787109375,
385
- "logps/rejected": -256.87115478515625,
386
- "loss": 0.5328,
387
- "pred_label": 0.0,
388
- "rewards/accuracies": 0.690625011920929,
389
- "rewards/chosen": 0.014087711460888386,
390
- "rewards/margins": 0.7686548233032227,
391
- "rewards/rejected": -0.7545671463012695,
392
- "step": 230,
393
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
  },
395
  {
396
  "epoch": 0.5,
397
- "learning_rate": 2.762237762237762e-05,
398
- "logits/chosen": -2.7545018196105957,
399
- "logits/rejected": -2.730861186981201,
400
- "logps/chosen": -256.40753173828125,
401
- "logps/rejected": -260.3459167480469,
402
- "loss": 0.5315,
403
- "pred_label": 0.0,
404
- "rewards/accuracies": 0.7093750238418579,
405
- "rewards/chosen": -0.0070175291039049625,
406
- "rewards/margins": 0.7594768404960632,
407
- "rewards/rejected": -0.7664943933486938,
408
- "step": 240,
409
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
  },
411
  {
412
  "epoch": 0.52,
413
- "learning_rate": 2.6456876456876455e-05,
414
- "logits/chosen": -2.8026585578918457,
415
- "logits/rejected": -2.778594493865967,
416
- "logps/chosen": -277.6792907714844,
417
- "logps/rejected": -256.2415771484375,
418
- "loss": 0.5158,
419
- "pred_label": 0.0,
420
- "rewards/accuracies": 0.7406250238418579,
421
- "rewards/chosen": 0.056344062089920044,
422
- "rewards/margins": 0.8976815938949585,
423
- "rewards/rejected": -0.8413375616073608,
424
- "step": 250,
425
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
426
  },
427
  {
428
  "epoch": 0.54,
429
- "learning_rate": 2.529137529137529e-05,
430
- "logits/chosen": -2.8391366004943848,
431
- "logits/rejected": -2.8148343563079834,
432
- "logps/chosen": -284.32781982421875,
433
- "logps/rejected": -262.647705078125,
434
- "loss": 0.4916,
435
- "pred_label": 0.0,
436
- "rewards/accuracies": 0.7406250238418579,
437
- "rewards/chosen": -0.06821730732917786,
438
- "rewards/margins": 1.0360552072525024,
439
- "rewards/rejected": -1.104272484779358,
440
- "step": 260,
441
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
  },
443
  {
444
  "epoch": 0.57,
445
- "learning_rate": 2.4125874125874125e-05,
446
- "logits/chosen": -2.841184139251709,
447
- "logits/rejected": -2.813793659210205,
448
- "logps/chosen": -295.7508544921875,
449
- "logps/rejected": -291.97467041015625,
450
- "loss": 0.5071,
451
- "pred_label": 0.0,
452
- "rewards/accuracies": 0.746874988079071,
453
- "rewards/chosen": -0.13246159255504608,
454
- "rewards/margins": 1.0678224563598633,
455
- "rewards/rejected": -1.2002841234207153,
456
- "step": 270,
457
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
  },
459
  {
460
  "epoch": 0.59,
461
- "learning_rate": 2.296037296037296e-05,
462
- "logits/chosen": -2.8141586780548096,
463
- "logits/rejected": -2.803541898727417,
464
- "logps/chosen": -283.2509765625,
465
- "logps/rejected": -250.5514373779297,
466
- "loss": 0.5358,
467
- "pred_label": 0.0,
468
- "rewards/accuracies": 0.715624988079071,
469
- "rewards/chosen": -0.14472146332263947,
470
- "rewards/margins": 0.9390050768852234,
471
- "rewards/rejected": -1.0837266445159912,
472
- "step": 280,
473
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
474
  },
475
  {
476
  "epoch": 0.61,
477
- "learning_rate": 2.1794871794871795e-05,
478
- "logits/chosen": -2.821655035018921,
479
- "logits/rejected": -2.8060765266418457,
480
- "logps/chosen": -274.44903564453125,
481
- "logps/rejected": -272.57745361328125,
482
- "loss": 0.5206,
483
- "pred_label": 0.0,
484
- "rewards/accuracies": 0.7437499761581421,
485
- "rewards/chosen": -0.06504921615123749,
486
- "rewards/margins": 0.8676969408988953,
487
- "rewards/rejected": -0.9327462315559387,
488
- "step": 290,
489
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
  },
491
  {
492
  "epoch": 0.63,
493
- "learning_rate": 2.062937062937063e-05,
494
- "logits/chosen": -2.8019015789031982,
495
- "logits/rejected": -2.801147222518921,
496
- "logps/chosen": -292.081787109375,
497
- "logps/rejected": -301.0537414550781,
498
- "loss": 0.5177,
499
- "pred_label": 0.0,
500
- "rewards/accuracies": 0.734375,
501
- "rewards/chosen": 0.03986026719212532,
502
- "rewards/margins": 0.968209445476532,
503
- "rewards/rejected": -0.9283491969108582,
504
- "step": 300,
505
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
506
  },
507
  {
508
  "epoch": 0.65,
509
- "learning_rate": 1.9463869463869462e-05,
510
- "logits/chosen": -2.809706449508667,
511
- "logits/rejected": -2.781670570373535,
512
- "logps/chosen": -293.81451416015625,
513
- "logps/rejected": -256.2749938964844,
514
- "loss": 0.5066,
515
- "pred_label": 0.0,
516
- "rewards/accuracies": 0.7281249761581421,
517
- "rewards/chosen": -0.08690959960222244,
518
- "rewards/margins": 0.949812114238739,
519
- "rewards/rejected": -1.0367217063903809,
520
- "step": 310,
521
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
522
  },
523
  {
524
  "epoch": 0.67,
525
- "learning_rate": 1.8298368298368298e-05,
526
- "logits/chosen": -2.797341823577881,
527
- "logits/rejected": -2.787775754928589,
528
- "logps/chosen": -269.12139892578125,
529
- "logps/rejected": -266.01080322265625,
530
- "loss": 0.4996,
531
- "pred_label": 0.0,
532
- "rewards/accuracies": 0.768750011920929,
533
- "rewards/chosen": 0.0034091435372829437,
534
- "rewards/margins": 1.1171700954437256,
535
- "rewards/rejected": -1.1137609481811523,
536
- "step": 320,
537
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
538
  },
539
  {
540
  "epoch": 0.69,
541
- "learning_rate": 1.7132867132867133e-05,
542
- "logits/chosen": -2.8247230052948,
543
- "logits/rejected": -2.80410099029541,
544
- "logps/chosen": -299.09442138671875,
545
- "logps/rejected": -273.0837707519531,
546
- "loss": 0.5236,
547
- "pred_label": 0.0,
548
- "rewards/accuracies": 0.731249988079071,
549
- "rewards/chosen": -0.04382320120930672,
550
- "rewards/margins": 0.871612548828125,
551
- "rewards/rejected": -0.915435791015625,
552
- "step": 330,
553
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
554
  },
555
  {
556
  "epoch": 0.71,
557
- "learning_rate": 1.5967365967365968e-05,
558
- "logits/chosen": -2.806962251663208,
559
- "logits/rejected": -2.79986572265625,
560
- "logps/chosen": -284.2406005859375,
561
- "logps/rejected": -258.4219665527344,
562
- "loss": 0.5258,
563
- "pred_label": 0.0,
564
- "rewards/accuracies": 0.7093750238418579,
565
- "rewards/chosen": -0.023638445883989334,
566
- "rewards/margins": 0.8371860384941101,
567
- "rewards/rejected": -0.8608245849609375,
568
- "step": 340,
569
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
  },
571
  {
572
  "epoch": 0.73,
573
- "learning_rate": 1.4801864801864803e-05,
574
- "logits/chosen": -2.803683280944824,
575
- "logits/rejected": -2.7878894805908203,
576
- "logps/chosen": -263.1024169921875,
577
- "logps/rejected": -239.3209686279297,
578
- "loss": 0.4963,
579
- "pred_label": 0.0,
580
- "rewards/accuracies": 0.765625,
581
- "rewards/chosen": -0.0065319957211613655,
582
- "rewards/margins": 1.0474125146865845,
583
- "rewards/rejected": -1.05394446849823,
584
- "step": 350,
585
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
586
  },
587
  {
588
  "epoch": 0.75,
589
- "learning_rate": 1.3636363636363637e-05,
590
- "logits/chosen": -2.8253402709960938,
591
- "logits/rejected": -2.817469596862793,
592
- "logps/chosen": -280.67022705078125,
593
- "logps/rejected": -258.00579833984375,
594
- "loss": 0.4967,
595
- "pred_label": 0.0,
596
- "rewards/accuracies": 0.734375,
597
- "rewards/chosen": -0.015683341771364212,
598
- "rewards/margins": 1.0513224601745605,
599
- "rewards/rejected": -1.0670057535171509,
600
- "step": 360,
601
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
602
  },
603
  {
604
  "epoch": 0.77,
605
- "learning_rate": 1.2470862470862472e-05,
606
- "logits/chosen": -2.793775796890259,
607
- "logits/rejected": -2.796365261077881,
608
- "logps/chosen": -302.306884765625,
609
- "logps/rejected": -273.70635986328125,
610
- "loss": 0.5115,
611
- "pred_label": 0.0,
612
- "rewards/accuracies": 0.737500011920929,
613
- "rewards/chosen": 0.010395345278084278,
614
- "rewards/margins": 0.973157525062561,
615
- "rewards/rejected": -0.9627620577812195,
616
- "step": 370,
617
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
618
  },
619
  {
620
  "epoch": 0.8,
621
- "learning_rate": 1.1305361305361307e-05,
622
- "logits/chosen": -2.8007330894470215,
623
- "logits/rejected": -2.809499740600586,
624
- "logps/chosen": -277.95599365234375,
625
- "logps/rejected": -271.8043212890625,
626
- "loss": 0.5023,
627
- "pred_label": 0.0,
628
- "rewards/accuracies": 0.768750011920929,
629
- "rewards/chosen": 0.0019541799556463957,
630
- "rewards/margins": 1.0503606796264648,
631
- "rewards/rejected": -1.0484063625335693,
632
- "step": 380,
633
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
634
  },
635
  {
636
  "epoch": 0.82,
637
- "learning_rate": 1.013986013986014e-05,
638
- "logits/chosen": -2.838869571685791,
639
- "logits/rejected": -2.8424456119537354,
640
- "logps/chosen": -291.6543884277344,
641
- "logps/rejected": -275.7422790527344,
642
- "loss": 0.5223,
643
- "pred_label": 0.0,
644
- "rewards/accuracies": 0.7406250238418579,
645
- "rewards/chosen": 0.009039236232638359,
646
- "rewards/margins": 0.990314781665802,
647
- "rewards/rejected": -0.9812755584716797,
648
- "step": 390,
649
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
650
  },
651
  {
652
  "epoch": 0.84,
653
- "learning_rate": 8.974358974358976e-06,
654
- "logits/chosen": -2.7799577713012695,
655
- "logits/rejected": -2.7700963020324707,
656
- "logps/chosen": -292.00604248046875,
657
- "logps/rejected": -278.8124694824219,
658
- "loss": 0.5059,
659
- "pred_label": 0.0,
660
- "rewards/accuracies": 0.7281249761581421,
661
- "rewards/chosen": -0.08654220402240753,
662
- "rewards/margins": 0.9550994634628296,
663
- "rewards/rejected": -1.0416417121887207,
664
- "step": 400,
665
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
666
  },
667
  {
668
  "epoch": 0.86,
669
- "learning_rate": 7.808857808857809e-06,
670
- "logits/chosen": -2.7790863513946533,
671
- "logits/rejected": -2.7839720249176025,
672
- "logps/chosen": -288.0146484375,
673
- "logps/rejected": -246.18386840820312,
674
- "loss": 0.5193,
675
- "pred_label": 0.0,
676
- "rewards/accuracies": 0.7593749761581421,
677
- "rewards/chosen": -0.0843573808670044,
678
- "rewards/margins": 1.01529860496521,
679
- "rewards/rejected": -1.0996559858322144,
680
- "step": 410,
681
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
682
  },
683
  {
684
  "epoch": 0.88,
685
- "learning_rate": 6.643356643356643e-06,
686
- "logits/chosen": -2.8042919635772705,
687
- "logits/rejected": -2.814549446105957,
688
- "logps/chosen": -267.73272705078125,
689
- "logps/rejected": -282.6224060058594,
690
- "loss": 0.5131,
691
- "pred_label": 0.0,
692
- "rewards/accuracies": 0.7250000238418579,
693
- "rewards/chosen": -0.23174908757209778,
694
- "rewards/margins": 0.7961767315864563,
695
- "rewards/rejected": -1.027925729751587,
696
- "step": 420,
697
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
698
  },
699
  {
700
  "epoch": 0.9,
701
- "learning_rate": 5.477855477855478e-06,
702
- "logits/chosen": -2.8008358478546143,
703
- "logits/rejected": -2.827676773071289,
704
- "logps/chosen": -291.685791015625,
705
- "logps/rejected": -269.49896240234375,
706
- "loss": 0.5135,
707
- "pred_label": 0.0,
708
- "rewards/accuracies": 0.7593749761581421,
709
- "rewards/chosen": -0.1730644553899765,
710
- "rewards/margins": 0.9842265248298645,
711
- "rewards/rejected": -1.157291054725647,
712
- "step": 430,
713
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
714
  },
715
  {
716
  "epoch": 0.92,
717
- "learning_rate": 4.312354312354312e-06,
718
- "logits/chosen": -2.8276031017303467,
719
- "logits/rejected": -2.81488299369812,
720
- "logps/chosen": -265.67242431640625,
721
- "logps/rejected": -261.6026611328125,
722
- "loss": 0.51,
723
- "pred_label": 0.0,
724
- "rewards/accuracies": 0.734375,
725
- "rewards/chosen": -0.08840381354093552,
726
- "rewards/margins": 0.9768926501274109,
727
- "rewards/rejected": -1.0652964115142822,
728
- "step": 440,
729
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
730
  },
731
  {
732
  "epoch": 0.94,
733
- "learning_rate": 3.1468531468531472e-06,
734
- "logits/chosen": -2.794529676437378,
735
- "logits/rejected": -2.7928192615509033,
736
- "logps/chosen": -280.88995361328125,
737
- "logps/rejected": -275.9464416503906,
738
- "loss": 0.5128,
739
- "pred_label": 0.0,
740
- "rewards/accuracies": 0.765625,
741
- "rewards/chosen": -0.06786171346902847,
742
- "rewards/margins": 0.9994968175888062,
743
- "rewards/rejected": -1.0673584938049316,
744
- "step": 450,
745
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
746
  },
747
  {
748
  "epoch": 0.96,
749
- "learning_rate": 1.981351981351981e-06,
750
- "logits/chosen": -2.7951102256774902,
751
- "logits/rejected": -2.7973313331604004,
752
- "logps/chosen": -297.4739990234375,
753
- "logps/rejected": -272.06024169921875,
754
- "loss": 0.4871,
755
- "pred_label": 0.0,
756
- "rewards/accuracies": 0.737500011920929,
757
- "rewards/chosen": -0.08221320062875748,
758
- "rewards/margins": 0.9878571629524231,
759
- "rewards/rejected": -1.0700703859329224,
760
- "step": 460,
761
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
762
  },
763
  {
764
  "epoch": 0.98,
765
- "learning_rate": 8.158508158508159e-07,
766
- "logits/chosen": -2.8256263732910156,
767
- "logits/rejected": -2.7975211143493652,
768
- "logps/chosen": -278.83154296875,
769
- "logps/rejected": -266.3514099121094,
770
- "loss": 0.4951,
771
- "pred_label": 0.0,
772
- "rewards/accuracies": 0.7593749761581421,
773
- "rewards/chosen": -0.10287537425756454,
774
- "rewards/margins": 1.0982601642608643,
775
- "rewards/rejected": -1.2011353969573975,
776
- "step": 470,
777
- "use_label": 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
778
  },
779
  {
780
  "epoch": 1.0,
781
- "eval_logits/chosen": -2.822110891342163,
782
- "eval_logits/rejected": -2.8178136348724365,
783
- "eval_logps/chosen": -285.4951171875,
784
- "eval_logps/rejected": -270.55743408203125,
785
- "eval_loss": 0.5198934078216553,
786
- "eval_pred_label": 0.0,
787
- "eval_rewards/accuracies": 0.7300000190734863,
788
- "eval_rewards/chosen": -0.12377375364303589,
789
- "eval_rewards/margins": 1.001997470855713,
790
- "eval_rewards/rejected": -1.1257712841033936,
791
- "eval_runtime": 453.8631,
792
- "eval_samples_per_second": 4.407,
793
- "eval_steps_per_second": 0.275,
794
- "eval_use_label": 0.0,
795
- "step": 477
796
  },
797
  {
798
  "epoch": 1.0,
799
- "step": 477,
800
  "total_flos": 0.0,
801
- "train_loss": 0.5321606655040877,
802
- "train_runtime": 24451.2028,
803
- "train_samples_per_second": 2.5,
804
- "train_steps_per_second": 0.02
805
  }
806
  ],
807
  "logging_steps": 10,
808
- "max_steps": 477,
809
  "num_train_epochs": 1,
810
  "save_steps": 50,
811
  "total_flos": 0.0,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9997382884061764,
5
  "eval_steps": 100,
6
+ "global_step": 955,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
+ "learning_rate": 5.208333333333334e-07,
14
+ "logits/chosen": -2.980285167694092,
15
+ "logits/rejected": -2.87275767326355,
16
+ "logps/chosen": -313.4390563964844,
17
+ "logps/rejected": -236.1754150390625,
18
  "loss": 0.6931,
19
  "pred_label": 0.0,
20
  "rewards/accuracies": 0.0,
 
22
  "rewards/margins": 0.0,
23
  "rewards/rejected": 0.0,
24
  "step": 1,
25
+ "use_label": 10.0
26
+ },
27
+ {
28
+ "epoch": 0.01,
29
+ "learning_rate": 5.208333333333334e-06,
30
+ "logits/chosen": -2.861464262008667,
31
+ "logits/rejected": -2.907951593399048,
32
+ "logps/chosen": -323.6517333984375,
33
+ "logps/rejected": -284.9451904296875,
34
+ "loss": 0.6921,
35
+ "pred_label": 0.0,
36
+ "rewards/accuracies": 0.4375,
37
+ "rewards/chosen": 0.0027037172112613916,
38
+ "rewards/margins": 0.001292458618991077,
39
+ "rewards/rejected": 0.0014112575445324183,
40
+ "step": 10,
41
+ "use_label": 90.0
42
  },
43
  {
44
  "epoch": 0.02,
45
  "learning_rate": 1.0416666666666668e-05,
46
+ "logits/chosen": -2.7527613639831543,
47
+ "logits/rejected": -2.796025037765503,
48
+ "logps/chosen": -236.6191864013672,
49
+ "logps/rejected": -242.22232055664062,
50
+ "loss": 0.6831,
51
  "pred_label": 0.0,
52
+ "rewards/accuracies": 0.5874999761581421,
53
+ "rewards/chosen": 0.007748906500637531,
54
+ "rewards/margins": 0.018490687012672424,
55
+ "rewards/rejected": -0.010741781443357468,
56
+ "step": 20,
57
+ "use_label": 242.0
58
+ },
59
+ {
60
+ "epoch": 0.03,
61
+ "learning_rate": 1.5625e-05,
62
+ "logits/chosen": -2.8575313091278076,
63
+ "logits/rejected": -2.829209804534912,
64
+ "logps/chosen": -278.3554992675781,
65
+ "logps/rejected": -252.61123657226562,
66
+ "loss": 0.6574,
67
+ "pred_label": 0.0,
68
+ "rewards/accuracies": 0.6499999761581421,
69
+ "rewards/chosen": 0.026403894647955894,
70
+ "rewards/margins": 0.09085250645875931,
71
+ "rewards/rejected": -0.06444860994815826,
72
+ "step": 30,
73
+ "use_label": 402.0
74
  },
75
  {
76
  "epoch": 0.04,
77
  "learning_rate": 2.0833333333333336e-05,
78
+ "logits/chosen": -2.840946674346924,
79
+ "logits/rejected": -2.8493659496307373,
80
+ "logps/chosen": -281.32928466796875,
81
+ "logps/rejected": -277.8607482910156,
82
+ "loss": 0.6339,
83
+ "pred_label": 0.4749999940395355,
84
+ "rewards/accuracies": 0.6875,
85
+ "rewards/chosen": 0.02641097828745842,
86
+ "rewards/margins": 0.2079576551914215,
87
+ "rewards/rejected": -0.1815466731786728,
88
+ "step": 40,
89
+ "use_label": 561.5250244140625
90
+ },
91
+ {
92
+ "epoch": 0.05,
93
+ "learning_rate": 2.604166666666667e-05,
94
+ "logits/chosen": -2.8537254333496094,
95
+ "logits/rejected": -2.8391127586364746,
96
+ "logps/chosen": -266.79296875,
97
+ "logps/rejected": -262.0001220703125,
98
+ "loss": 0.5836,
99
+ "pred_label": 5.775000095367432,
100
+ "rewards/accuracies": 0.643750011920929,
101
+ "rewards/chosen": -0.06846104562282562,
102
+ "rewards/margins": 0.33990827202796936,
103
+ "rewards/rejected": -0.4083693027496338,
104
+ "step": 50,
105
+ "use_label": 716.2249755859375
106
  },
107
  {
108
  "epoch": 0.06,
109
  "learning_rate": 3.125e-05,
110
+ "logits/chosen": -2.8152918815612793,
111
+ "logits/rejected": -2.804291009902954,
112
+ "logps/chosen": -301.41326904296875,
113
+ "logps/rejected": -291.53997802734375,
114
+ "loss": 0.5613,
115
+ "pred_label": 28.600000381469727,
116
+ "rewards/accuracies": 0.699999988079071,
117
+ "rewards/chosen": -0.09980294108390808,
118
+ "rewards/margins": 0.4436502456665039,
119
+ "rewards/rejected": -0.5434532165527344,
120
+ "step": 60,
121
+ "use_label": 853.4000244140625
122
+ },
123
+ {
124
+ "epoch": 0.07,
125
+ "learning_rate": 3.6458333333333336e-05,
126
+ "logits/chosen": -2.8159656524658203,
127
+ "logits/rejected": -2.807382345199585,
128
+ "logps/chosen": -295.85113525390625,
129
+ "logps/rejected": -281.4297180175781,
130
+ "loss": 0.4736,
131
+ "pred_label": 72.82499694824219,
132
+ "rewards/accuracies": 0.731249988079071,
133
+ "rewards/chosen": -0.15376296639442444,
134
+ "rewards/margins": 0.6926594972610474,
135
+ "rewards/rejected": -0.8464224934577942,
136
+ "step": 70,
137
+ "use_label": 969.1749877929688
138
  },
139
  {
140
  "epoch": 0.08,
141
  "learning_rate": 4.166666666666667e-05,
142
+ "logits/chosen": -2.760671377182007,
143
+ "logits/rejected": -2.745089292526245,
144
+ "logps/chosen": -309.682861328125,
145
+ "logps/rejected": -294.1726989746094,
146
+ "loss": 0.3682,
147
+ "pred_label": 129.4499969482422,
148
+ "rewards/accuracies": 0.731249988079071,
149
+ "rewards/chosen": -0.4377492070198059,
150
+ "rewards/margins": 1.0782606601715088,
151
+ "rewards/rejected": -1.516010046005249,
152
+ "step": 80,
153
+ "use_label": 1072.550048828125
154
  },
155
  {
156
+ "epoch": 0.09,
157
+ "learning_rate": 4.6875e-05,
158
+ "logits/chosen": -2.689037799835205,
159
+ "logits/rejected": -2.7456631660461426,
160
+ "logps/chosen": -298.6680603027344,
161
+ "logps/rejected": -281.3171081542969,
162
+ "loss": 0.3626,
163
+ "pred_label": 213.02499389648438,
164
  "rewards/accuracies": 0.731249988079071,
165
+ "rewards/chosen": -0.28849169611930847,
166
+ "rewards/margins": 1.2157728672027588,
167
+ "rewards/rejected": -1.5042643547058105,
168
+ "step": 90,
169
+ "use_label": 1148.9749755859375
170
+ },
171
+ {
172
+ "epoch": 0.1,
173
+ "learning_rate": 4.976717112922003e-05,
174
+ "logits/chosen": -2.722339153289795,
175
+ "logits/rejected": -2.718428611755371,
176
+ "logps/chosen": -287.2553405761719,
177
+ "logps/rejected": -325.00335693359375,
178
+ "loss": 0.3168,
179
+ "pred_label": 303.125,
180
+ "rewards/accuracies": 0.71875,
181
+ "rewards/chosen": -1.3123645782470703,
182
+ "rewards/margins": 1.8343286514282227,
183
+ "rewards/rejected": -3.146693468093872,
184
+ "step": 100,
185
+ "use_label": 1218.875
186
+ },
187
+ {
188
+ "epoch": 0.12,
189
+ "learning_rate": 4.918509895227008e-05,
190
+ "logits/chosen": -2.694249391555786,
191
+ "logits/rejected": -2.633723497390747,
192
+ "logps/chosen": -288.16387939453125,
193
+ "logps/rejected": -293.7809143066406,
194
+ "loss": 0.2607,
195
+ "pred_label": 402.625,
196
+ "rewards/accuracies": 0.675000011920929,
197
+ "rewards/chosen": -2.3489272594451904,
198
+ "rewards/margins": 2.1118221282958984,
199
+ "rewards/rejected": -4.46074914932251,
200
+ "step": 110,
201
+ "use_label": 1279.375
202
  },
203
  {
204
  "epoch": 0.13,
205
+ "learning_rate": 4.860302677532014e-05,
206
+ "logits/chosen": -2.718721866607666,
207
+ "logits/rejected": -2.699587345123291,
208
+ "logps/chosen": -292.71112060546875,
209
+ "logps/rejected": -279.4311218261719,
210
+ "loss": 0.2879,
211
+ "pred_label": 507.5,
212
+ "rewards/accuracies": 0.6937500238418579,
213
+ "rewards/chosen": -1.3198258876800537,
214
+ "rewards/margins": 1.854914903640747,
215
+ "rewards/rejected": -3.1747405529022217,
216
+ "step": 120,
217
+ "use_label": 1334.5
218
+ },
219
+ {
220
+ "epoch": 0.14,
221
+ "learning_rate": 4.80209545983702e-05,
222
+ "logits/chosen": -2.7755086421966553,
223
+ "logits/rejected": -2.7087435722351074,
224
+ "logps/chosen": -329.43267822265625,
225
+ "logps/rejected": -308.2383728027344,
226
+ "loss": 0.2811,
227
+ "pred_label": 611.7249755859375,
228
+ "rewards/accuracies": 0.65625,
229
+ "rewards/chosen": -2.216526508331299,
230
+ "rewards/margins": 1.2961757183074951,
231
+ "rewards/rejected": -3.512702226638794,
232
+ "step": 130,
233
+ "use_label": 1390.2750244140625
234
  },
235
  {
236
  "epoch": 0.15,
237
+ "learning_rate": 4.743888242142026e-05,
238
+ "logits/chosen": -2.6767191886901855,
239
+ "logits/rejected": -2.643078327178955,
240
+ "logps/chosen": -318.53924560546875,
241
+ "logps/rejected": -322.80078125,
242
+ "loss": 0.1985,
243
+ "pred_label": 719.9749755859375,
244
+ "rewards/accuracies": 0.6812499761581421,
245
+ "rewards/chosen": -6.043245792388916,
246
+ "rewards/margins": 2.734940767288208,
247
+ "rewards/rejected": -8.778186798095703,
248
+ "step": 140,
249
+ "use_label": 1442.0250244140625
250
+ },
251
+ {
252
+ "epoch": 0.16,
253
+ "learning_rate": 4.685681024447032e-05,
254
+ "logits/chosen": -2.008868932723999,
255
+ "logits/rejected": -2.024056911468506,
256
+ "logps/chosen": -2590.871337890625,
257
+ "logps/rejected": -2381.74951171875,
258
+ "loss": 0.037,
259
+ "pred_label": 853.2249755859375,
260
+ "rewards/accuracies": 0.512499988079071,
261
+ "rewards/chosen": -229.59854125976562,
262
+ "rewards/margins": -19.598337173461914,
263
+ "rewards/rejected": -210.0001983642578,
264
+ "step": 150,
265
+ "use_label": 1468.7750244140625
266
  },
267
  {
268
  "epoch": 0.17,
269
+ "learning_rate": 4.6274738067520374e-05,
270
+ "logits/chosen": -3.4510104656219482,
271
+ "logits/rejected": -3.4814345836639404,
272
+ "logps/chosen": -5424.06201171875,
273
+ "logps/rejected": -4965.0986328125,
274
+ "loss": 0.0229,
275
+ "pred_label": 1008.5750122070312,
276
+ "rewards/accuracies": 0.4312500059604645,
277
+ "rewards/chosen": -516.1680297851562,
278
+ "rewards/margins": -46.562461853027344,
279
+ "rewards/rejected": -469.6055603027344,
280
+ "step": 160,
281
+ "use_label": 1473.425048828125
282
+ },
283
+ {
284
+ "epoch": 0.18,
285
+ "learning_rate": 4.5692665890570435e-05,
286
+ "logits/chosen": -3.6305947303771973,
287
+ "logits/rejected": -3.6412110328674316,
288
+ "logps/chosen": -5863.26220703125,
289
+ "logps/rejected": -4459.16650390625,
290
+ "loss": 0.0239,
291
+ "pred_label": 1161.25,
292
+ "rewards/accuracies": 0.40625,
293
+ "rewards/chosen": -556.6785888671875,
294
+ "rewards/margins": -135.7264862060547,
295
+ "rewards/rejected": -420.95208740234375,
296
+ "step": 170,
297
+ "use_label": 1480.75
298
  },
299
  {
300
  "epoch": 0.19,
301
+ "learning_rate": 4.511059371362049e-05,
302
+ "logits/chosen": -3.826639175415039,
303
+ "logits/rejected": -3.826951503753662,
304
+ "logps/chosen": -5895.041015625,
305
+ "logps/rejected": -5085.115234375,
306
+ "loss": 0.021,
307
+ "pred_label": 1315.800048828125,
308
+ "rewards/accuracies": 0.4375,
309
+ "rewards/chosen": -563.9112548828125,
310
+ "rewards/margins": -82.46792602539062,
311
+ "rewards/rejected": -481.443359375,
312
+ "step": 180,
313
+ "use_label": 1486.199951171875
314
+ },
315
+ {
316
+ "epoch": 0.2,
317
+ "learning_rate": 4.452852153667055e-05,
318
+ "logits/chosen": -3.8287880420684814,
319
+ "logits/rejected": -3.829810619354248,
320
+ "logps/chosen": -6264.6552734375,
321
+ "logps/rejected": -4964.57666015625,
322
+ "loss": 0.0083,
323
+ "pred_label": 1472.0,
324
+ "rewards/accuracies": 0.3687500059604645,
325
+ "rewards/chosen": -595.7398681640625,
326
+ "rewards/margins": -125.386474609375,
327
+ "rewards/rejected": -470.3534240722656,
328
+ "step": 190,
329
+ "use_label": 1490.0
330
  },
331
  {
332
  "epoch": 0.21,
333
+ "learning_rate": 4.394644935972061e-05,
334
+ "logits/chosen": -3.800830364227295,
335
+ "logits/rejected": -3.8033287525177,
336
+ "logps/chosen": -5603.17431640625,
337
+ "logps/rejected": -5157.21826171875,
338
+ "loss": 0.0242,
339
+ "pred_label": 1629.2249755859375,
340
+ "rewards/accuracies": 0.44999998807907104,
341
+ "rewards/chosen": -534.1845092773438,
342
+ "rewards/margins": -45.28679656982422,
343
+ "rewards/rejected": -488.897705078125,
344
+ "step": 200,
345
+ "use_label": 1492.7750244140625
346
+ },
347
+ {
348
+ "epoch": 0.22,
349
+ "learning_rate": 4.336437718277067e-05,
350
+ "logits/chosen": -3.7285819053649902,
351
+ "logits/rejected": -3.7191810607910156,
352
+ "logps/chosen": -6087.337890625,
353
+ "logps/rejected": -5075.240234375,
354
+ "loss": 0.0165,
355
+ "pred_label": 1786.125,
356
+ "rewards/accuracies": 0.4312500059604645,
357
+ "rewards/chosen": -578.6089477539062,
358
+ "rewards/margins": -96.50392150878906,
359
+ "rewards/rejected": -482.1050720214844,
360
+ "step": 210,
361
+ "use_label": 1495.875
362
  },
363
  {
364
  "epoch": 0.23,
365
+ "learning_rate": 4.278230500582072e-05,
366
+ "logits/chosen": -3.7653274536132812,
367
+ "logits/rejected": -3.7663722038269043,
368
+ "logps/chosen": -5865.328125,
369
+ "logps/rejected": -5630.29248046875,
370
+ "loss": 0.0263,
371
+ "pred_label": 1942.125,
372
+ "rewards/accuracies": 0.48750001192092896,
373
+ "rewards/chosen": -559.0337524414062,
374
+ "rewards/margins": -23.070148468017578,
375
+ "rewards/rejected": -535.9635620117188,
376
+ "step": 220,
377
+ "use_label": 1499.875
378
+ },
379
+ {
380
+ "epoch": 0.24,
381
+ "learning_rate": 4.220023282887078e-05,
382
+ "logits/chosen": -3.8049216270446777,
383
+ "logits/rejected": -3.8088595867156982,
384
+ "logps/chosen": -6366.97509765625,
385
+ "logps/rejected": -5381.87548828125,
386
+ "loss": 0.0175,
387
+ "pred_label": 2098.27490234375,
388
+ "rewards/accuracies": 0.42500001192092896,
389
+ "rewards/chosen": -605.8801879882812,
390
+ "rewards/margins": -94.10356140136719,
391
+ "rewards/rejected": -511.776611328125,
392
+ "step": 230,
393
+ "use_label": 1503.7249755859375
394
  },
395
  {
396
  "epoch": 0.25,
397
+ "learning_rate": 4.161816065192084e-05,
398
+ "logits/chosen": -3.80168080329895,
399
+ "logits/rejected": -3.802356243133545,
400
+ "logps/chosen": -5398.353515625,
401
+ "logps/rejected": -4512.5625,
402
+ "loss": 0.0201,
403
+ "pred_label": 2253.375,
404
+ "rewards/accuracies": 0.3812499940395355,
405
+ "rewards/chosen": -512.3775634765625,
406
+ "rewards/margins": -83.42332458496094,
407
+ "rewards/rejected": -428.95428466796875,
408
+ "step": 240,
409
+ "use_label": 1508.625
410
+ },
411
+ {
412
+ "epoch": 0.26,
413
+ "learning_rate": 4.10360884749709e-05,
414
+ "logits/chosen": -3.815431594848633,
415
+ "logits/rejected": -3.8156495094299316,
416
+ "logps/chosen": -6113.8330078125,
417
+ "logps/rejected": -5319.52783203125,
418
+ "loss": 0.0204,
419
+ "pred_label": 2408.97509765625,
420
+ "rewards/accuracies": 0.44999998807907104,
421
+ "rewards/chosen": -582.3192138671875,
422
+ "rewards/margins": -77.30831146240234,
423
+ "rewards/rejected": -505.01092529296875,
424
+ "step": 250,
425
+ "use_label": 1513.0250244140625
426
  },
427
  {
428
  "epoch": 0.27,
429
+ "learning_rate": 4.045401629802096e-05,
430
+ "logits/chosen": -3.8084158897399902,
431
+ "logits/rejected": -3.8078300952911377,
432
+ "logps/chosen": -5415.3056640625,
433
+ "logps/rejected": -4981.9599609375,
434
+ "loss": 0.0144,
435
+ "pred_label": 2563.925048828125,
436
+ "rewards/accuracies": 0.5062500238418579,
437
+ "rewards/chosen": -516.6696166992188,
438
+ "rewards/margins": -43.502445220947266,
439
+ "rewards/rejected": -473.16717529296875,
440
+ "step": 260,
441
+ "use_label": 1518.074951171875
442
+ },
443
+ {
444
+ "epoch": 0.28,
445
+ "learning_rate": 3.9871944121071014e-05,
446
+ "logits/chosen": -3.8132598400115967,
447
+ "logits/rejected": -3.8127427101135254,
448
+ "logps/chosen": -5882.3447265625,
449
+ "logps/rejected": -5165.20703125,
450
+ "loss": 0.0155,
451
+ "pred_label": 2719.97509765625,
452
+ "rewards/accuracies": 0.4625000059604645,
453
+ "rewards/chosen": -559.0473022460938,
454
+ "rewards/margins": -70.0018310546875,
455
+ "rewards/rejected": -489.0455017089844,
456
+ "step": 270,
457
+ "use_label": 1522.0250244140625
458
  },
459
  {
460
  "epoch": 0.29,
461
+ "learning_rate": 3.928987194412107e-05,
462
+ "logits/chosen": -3.8188316822052,
463
+ "logits/rejected": -3.818444013595581,
464
+ "logps/chosen": -5914.48486328125,
465
+ "logps/rejected": -5317.22021484375,
466
+ "loss": 0.0222,
467
+ "pred_label": 2876.02490234375,
468
+ "rewards/accuracies": 0.4312500059604645,
469
+ "rewards/chosen": -562.0521240234375,
470
+ "rewards/margins": -56.552947998046875,
471
+ "rewards/rejected": -505.4991760253906,
472
+ "step": 280,
473
+ "use_label": 1525.9749755859375
474
+ },
475
+ {
476
+ "epoch": 0.3,
477
+ "learning_rate": 3.870779976717113e-05,
478
+ "logits/chosen": -3.819366931915283,
479
+ "logits/rejected": -3.82012939453125,
480
+ "logps/chosen": -5673.76416015625,
481
+ "logps/rejected": -4572.4462890625,
482
+ "loss": 0.0131,
483
+ "pred_label": 3034.27490234375,
484
+ "rewards/accuracies": 0.40625,
485
+ "rewards/chosen": -538.0841674804688,
486
+ "rewards/margins": -103.86119079589844,
487
+ "rewards/rejected": -434.2230529785156,
488
+ "step": 290,
489
+ "use_label": 1527.7249755859375
490
  },
491
  {
492
  "epoch": 0.31,
493
+ "learning_rate": 3.812572759022119e-05,
494
+ "logits/chosen": -3.801610231399536,
495
+ "logits/rejected": -3.802950382232666,
496
+ "logps/chosen": -5732.44921875,
497
+ "logps/rejected": -4702.1435546875,
498
+ "loss": 0.0155,
499
+ "pred_label": 3192.824951171875,
500
+ "rewards/accuracies": 0.38749998807907104,
501
+ "rewards/chosen": -546.8770751953125,
502
+ "rewards/margins": -100.64713287353516,
503
+ "rewards/rejected": -446.2298889160156,
504
+ "step": 300,
505
+ "use_label": 1529.175048828125
506
+ },
507
+ {
508
+ "epoch": 0.32,
509
+ "learning_rate": 3.7543655413271246e-05,
510
+ "logits/chosen": -3.7929720878601074,
511
+ "logits/rejected": -3.7945361137390137,
512
+ "logps/chosen": -5449.23046875,
513
+ "logps/rejected": -5404.5537109375,
514
+ "loss": 0.0162,
515
+ "pred_label": 3350.675048828125,
516
+ "rewards/accuracies": 0.5249999761581421,
517
+ "rewards/chosen": -518.22998046875,
518
+ "rewards/margins": -6.628878593444824,
519
+ "rewards/rejected": -511.60107421875,
520
+ "step": 310,
521
+ "use_label": 1531.324951171875
522
  },
523
  {
524
  "epoch": 0.33,
525
+ "learning_rate": 3.696158323632131e-05,
526
+ "logits/chosen": -3.804478883743286,
527
+ "logits/rejected": -3.808168411254883,
528
+ "logps/chosen": -6255.1689453125,
529
+ "logps/rejected": -5367.044921875,
530
+ "loss": 0.0127,
531
+ "pred_label": 3508.02490234375,
532
+ "rewards/accuracies": 0.4437499940395355,
533
+ "rewards/chosen": -595.9188842773438,
534
+ "rewards/margins": -87.29522705078125,
535
+ "rewards/rejected": -508.6236267089844,
536
+ "step": 320,
537
+ "use_label": 1533.9749755859375
538
+ },
539
+ {
540
+ "epoch": 0.35,
541
+ "learning_rate": 3.637951105937136e-05,
542
+ "logits/chosen": -3.806224822998047,
543
+ "logits/rejected": -3.809751510620117,
544
+ "logps/chosen": -5673.6767578125,
545
+ "logps/rejected": -4599.72119140625,
546
+ "loss": 0.0221,
547
+ "pred_label": 3666.10009765625,
548
+ "rewards/accuracies": 0.4749999940395355,
549
+ "rewards/chosen": -540.2728881835938,
550
+ "rewards/margins": -103.72891998291016,
551
+ "rewards/rejected": -436.5439453125,
552
+ "step": 330,
553
+ "use_label": 1535.9000244140625
554
  },
555
  {
556
  "epoch": 0.36,
557
+ "learning_rate": 3.579743888242142e-05,
558
+ "logits/chosen": -3.807875871658325,
559
+ "logits/rejected": -3.8099751472473145,
560
+ "logps/chosen": -5879.23486328125,
561
+ "logps/rejected": -4872.8642578125,
562
+ "loss": 0.0213,
563
+ "pred_label": 3819.85009765625,
564
+ "rewards/accuracies": 0.39375001192092896,
565
+ "rewards/chosen": -559.0055541992188,
566
+ "rewards/margins": -95.80415344238281,
567
+ "rewards/rejected": -463.20135498046875,
568
+ "step": 340,
569
+ "use_label": 1542.1500244140625
570
+ },
571
+ {
572
+ "epoch": 0.37,
573
+ "learning_rate": 3.5215366705471484e-05,
574
+ "logits/chosen": -3.8283824920654297,
575
+ "logits/rejected": -3.8290863037109375,
576
+ "logps/chosen": -6421.64453125,
577
+ "logps/rejected": -5712.4833984375,
578
+ "loss": 0.0197,
579
+ "pred_label": 3974.35009765625,
580
+ "rewards/accuracies": 0.375,
581
+ "rewards/chosen": -612.4450073242188,
582
+ "rewards/margins": -68.9627685546875,
583
+ "rewards/rejected": -543.482177734375,
584
+ "step": 350,
585
+ "use_label": 1547.6500244140625
586
  },
587
  {
588
  "epoch": 0.38,
589
+ "learning_rate": 3.463329452852154e-05,
590
+ "logits/chosen": -3.8224472999572754,
591
+ "logits/rejected": -3.822279691696167,
592
+ "logps/chosen": -5800.58251953125,
593
+ "logps/rejected": -5399.095703125,
594
+ "loss": 0.0133,
595
+ "pred_label": 4133.25,
596
+ "rewards/accuracies": 0.42500001192092896,
597
+ "rewards/chosen": -552.7788696289062,
598
+ "rewards/margins": -39.389373779296875,
599
+ "rewards/rejected": -513.3894653320312,
600
+ "step": 360,
601
+ "use_label": 1548.75
602
+ },
603
+ {
604
+ "epoch": 0.39,
605
+ "learning_rate": 3.40512223515716e-05,
606
+ "logits/chosen": -3.8213393688201904,
607
+ "logits/rejected": -3.8208725452423096,
608
+ "logps/chosen": -5875.4296875,
609
+ "logps/rejected": -5105.2080078125,
610
+ "loss": 0.0144,
611
+ "pred_label": 4289.4501953125,
612
+ "rewards/accuracies": 0.4625000059604645,
613
+ "rewards/chosen": -559.922607421875,
614
+ "rewards/margins": -76.97410583496094,
615
+ "rewards/rejected": -482.9485778808594,
616
+ "step": 370,
617
+ "use_label": 1552.550048828125
618
  },
619
  {
620
  "epoch": 0.4,
621
+ "learning_rate": 3.3469150174621654e-05,
622
+ "logits/chosen": -3.786717176437378,
623
+ "logits/rejected": -3.7882437705993652,
624
+ "logps/chosen": -6002.546875,
625
+ "logps/rejected": -5331.99560546875,
626
+ "loss": 0.0231,
627
+ "pred_label": 4444.1748046875,
628
+ "rewards/accuracies": 0.41874998807907104,
629
+ "rewards/chosen": -571.731689453125,
630
+ "rewards/margins": -64.58997344970703,
631
+ "rewards/rejected": -507.1416931152344,
632
+ "step": 380,
633
+ "use_label": 1557.824951171875
634
+ },
635
+ {
636
+ "epoch": 0.41,
637
+ "learning_rate": 3.288707799767171e-05,
638
+ "logits/chosen": -3.6485819816589355,
639
+ "logits/rejected": -3.6548709869384766,
640
+ "logps/chosen": -5633.61083984375,
641
+ "logps/rejected": -4738.9384765625,
642
+ "loss": 0.0232,
643
+ "pred_label": 4600.875,
644
+ "rewards/accuracies": 0.4312500059604645,
645
+ "rewards/chosen": -535.0819091796875,
646
+ "rewards/margins": -85.61624145507812,
647
+ "rewards/rejected": -449.46563720703125,
648
+ "step": 390,
649
+ "use_label": 1561.125
650
  },
651
  {
652
  "epoch": 0.42,
653
+ "learning_rate": 3.2305005820721776e-05,
654
+ "logits/chosen": -3.767920732498169,
655
+ "logits/rejected": -3.767390489578247,
656
+ "logps/chosen": -6094.14697265625,
657
+ "logps/rejected": -5175.2177734375,
658
+ "loss": 0.0231,
659
+ "pred_label": 4752.77490234375,
660
+ "rewards/accuracies": 0.41874998807907104,
661
+ "rewards/chosen": -578.7096557617188,
662
+ "rewards/margins": -87.12177276611328,
663
+ "rewards/rejected": -491.58782958984375,
664
+ "step": 400,
665
+ "use_label": 1569.2249755859375
666
+ },
667
+ {
668
+ "epoch": 0.43,
669
+ "learning_rate": 3.172293364377183e-05,
670
+ "logits/chosen": -3.7445671558380127,
671
+ "logits/rejected": -3.754565715789795,
672
+ "logps/chosen": -6168.5,
673
+ "logps/rejected": -5233.85009765625,
674
+ "loss": 0.0123,
675
+ "pred_label": 4906.02490234375,
676
+ "rewards/accuracies": 0.46875,
677
+ "rewards/chosen": -587.0402221679688,
678
+ "rewards/margins": -89.51008605957031,
679
+ "rewards/rejected": -497.5301818847656,
680
+ "step": 410,
681
+ "use_label": 1575.9749755859375
682
  },
683
  {
684
  "epoch": 0.44,
685
+ "learning_rate": 3.1140861466821885e-05,
686
+ "logits/chosen": -3.766185760498047,
687
+ "logits/rejected": -3.764925003051758,
688
+ "logps/chosen": -4928.68701171875,
689
+ "logps/rejected": -4211.3857421875,
690
+ "loss": 0.0286,
691
+ "pred_label": 5062.2001953125,
692
+ "rewards/accuracies": 0.42500001192092896,
693
+ "rewards/chosen": -468.8106994628906,
694
+ "rewards/margins": -72.5836410522461,
695
+ "rewards/rejected": -396.22705078125,
696
+ "step": 420,
697
+ "use_label": 1579.800048828125
698
+ },
699
+ {
700
+ "epoch": 0.45,
701
+ "learning_rate": 3.055878928987195e-05,
702
+ "logits/chosen": -3.76971173286438,
703
+ "logits/rejected": -3.766024351119995,
704
+ "logps/chosen": -5624.13330078125,
705
+ "logps/rejected": -5330.14599609375,
706
+ "loss": 0.0128,
707
+ "pred_label": 5218.125,
708
+ "rewards/accuracies": 0.48750001192092896,
709
+ "rewards/chosen": -534.6233520507812,
710
+ "rewards/margins": -27.560443878173828,
711
+ "rewards/rejected": -507.06292724609375,
712
+ "step": 430,
713
+ "use_label": 1583.875
714
  },
715
  {
716
  "epoch": 0.46,
717
+ "learning_rate": 2.9976717112922005e-05,
718
+ "logits/chosen": -3.8054771423339844,
719
+ "logits/rejected": -3.8054962158203125,
720
+ "logps/chosen": -5717.0419921875,
721
+ "logps/rejected": -4923.8671875,
722
+ "loss": 0.0159,
723
+ "pred_label": 5373.875,
724
+ "rewards/accuracies": 0.4375,
725
+ "rewards/chosen": -543.377197265625,
726
+ "rewards/margins": -76.2901382446289,
727
+ "rewards/rejected": -467.0870666503906,
728
+ "step": 440,
729
+ "use_label": 1588.125
730
+ },
731
+ {
732
+ "epoch": 0.47,
733
+ "learning_rate": 2.939464493597206e-05,
734
+ "logits/chosen": -3.7968783378601074,
735
+ "logits/rejected": -3.7904553413391113,
736
+ "logps/chosen": -4891.21484375,
737
+ "logps/rejected": -4621.8271484375,
738
+ "loss": 0.0209,
739
+ "pred_label": 5531.77490234375,
740
+ "rewards/accuracies": 0.4124999940395355,
741
+ "rewards/chosen": -466.149658203125,
742
+ "rewards/margins": -26.93206787109375,
743
+ "rewards/rejected": -439.21759033203125,
744
+ "step": 450,
745
+ "use_label": 1590.2249755859375
746
  },
747
  {
748
  "epoch": 0.48,
749
+ "learning_rate": 2.881257275902212e-05,
750
+ "logits/chosen": -3.8137125968933105,
751
+ "logits/rejected": -3.8143749237060547,
752
+ "logps/chosen": -6517.14404296875,
753
+ "logps/rejected": -5308.48095703125,
754
+ "loss": 0.0172,
755
+ "pred_label": 5688.375,
756
+ "rewards/accuracies": 0.39375001192092896,
757
+ "rewards/chosen": -621.5343017578125,
758
+ "rewards/margins": -117.5860595703125,
759
+ "rewards/rejected": -503.9481506347656,
760
+ "step": 460,
761
+ "use_label": 1593.625
762
+ },
763
+ {
764
+ "epoch": 0.49,
765
+ "learning_rate": 2.8230500582072178e-05,
766
+ "logits/chosen": -3.7992587089538574,
767
+ "logits/rejected": -3.799516201019287,
768
+ "logps/chosen": -5745.47314453125,
769
+ "logps/rejected": -5189.96923828125,
770
+ "loss": 0.0155,
771
+ "pred_label": 5845.52490234375,
772
+ "rewards/accuracies": 0.44999998807907104,
773
+ "rewards/chosen": -547.8372802734375,
774
+ "rewards/margins": -54.52460861206055,
775
+ "rewards/rejected": -493.3126525878906,
776
+ "step": 470,
777
+ "use_label": 1596.4749755859375
778
  },
779
  {
780
  "epoch": 0.5,
781
+ "learning_rate": 2.7648428405122233e-05,
782
+ "logits/chosen": -3.761199951171875,
783
+ "logits/rejected": -3.7633252143859863,
784
+ "logps/chosen": -5170.09765625,
785
+ "logps/rejected": -5077.68310546875,
786
+ "loss": 0.0168,
787
+ "pred_label": 6002.375,
788
+ "rewards/accuracies": 0.4625000059604645,
789
+ "rewards/chosen": -492.452392578125,
790
+ "rewards/margins": -9.535995483398438,
791
+ "rewards/rejected": -482.9164123535156,
792
+ "step": 480,
793
+ "use_label": 1599.625
794
+ },
795
+ {
796
+ "epoch": 0.51,
797
+ "learning_rate": 2.7066356228172297e-05,
798
+ "logits/chosen": -3.7587084770202637,
799
+ "logits/rejected": -3.758279323577881,
800
+ "logps/chosen": -5773.9345703125,
801
+ "logps/rejected": -4788.09765625,
802
+ "loss": 0.0171,
803
+ "pred_label": 6158.6748046875,
804
+ "rewards/accuracies": 0.4124999940395355,
805
+ "rewards/chosen": -550.6905517578125,
806
+ "rewards/margins": -95.62019348144531,
807
+ "rewards/rejected": -455.0704040527344,
808
+ "step": 490,
809
+ "use_label": 1603.324951171875
810
  },
811
  {
812
  "epoch": 0.52,
813
+ "learning_rate": 2.6484284051222352e-05,
814
+ "logits/chosen": -3.767758846282959,
815
+ "logits/rejected": -3.7685482501983643,
816
+ "logps/chosen": -6388.5419921875,
817
+ "logps/rejected": -5069.38916015625,
818
+ "loss": 0.0222,
819
+ "pred_label": 6314.52490234375,
820
+ "rewards/accuracies": 0.375,
821
+ "rewards/chosen": -609.9085693359375,
822
+ "rewards/margins": -128.79580688476562,
823
+ "rewards/rejected": -481.1127014160156,
824
+ "step": 500,
825
+ "use_label": 1607.4749755859375
826
+ },
827
+ {
828
+ "epoch": 0.53,
829
+ "learning_rate": 2.590221187427241e-05,
830
+ "logits/chosen": -3.7820258140563965,
831
+ "logits/rejected": -3.784348964691162,
832
+ "logps/chosen": -5971.962890625,
833
+ "logps/rejected": -4760.34912109375,
834
+ "loss": 0.0301,
835
+ "pred_label": 6469.9501953125,
836
+ "rewards/accuracies": 0.42500001192092896,
837
+ "rewards/chosen": -569.66259765625,
838
+ "rewards/margins": -116.97715759277344,
839
+ "rewards/rejected": -452.6853942871094,
840
+ "step": 510,
841
+ "use_label": 1612.050048828125
842
  },
843
  {
844
  "epoch": 0.54,
845
+ "learning_rate": 2.532013969732247e-05,
846
+ "logits/chosen": -3.718219041824341,
847
+ "logits/rejected": -3.72932767868042,
848
+ "logps/chosen": -6069.69580078125,
849
+ "logps/rejected": -5217.16015625,
850
+ "loss": 0.0223,
851
+ "pred_label": 6623.9248046875,
852
+ "rewards/accuracies": 0.41874998807907104,
853
+ "rewards/chosen": -577.7742309570312,
854
+ "rewards/margins": -83.02960205078125,
855
+ "rewards/rejected": -494.74456787109375,
856
+ "step": 520,
857
+ "use_label": 1618.074951171875
858
+ },
859
+ {
860
+ "epoch": 0.55,
861
+ "learning_rate": 2.4738067520372525e-05,
862
+ "logits/chosen": -3.7202675342559814,
863
+ "logits/rejected": -3.7229580879211426,
864
+ "logps/chosen": -6532.5537109375,
865
+ "logps/rejected": -5770.68359375,
866
+ "loss": 0.0095,
867
+ "pred_label": 6780.8251953125,
868
+ "rewards/accuracies": 0.4000000059604645,
869
+ "rewards/chosen": -623.2237548828125,
870
+ "rewards/margins": -75.55280303955078,
871
+ "rewards/rejected": -547.6709594726562,
872
+ "step": 530,
873
+ "use_label": 1621.175048828125
874
  },
875
  {
876
  "epoch": 0.57,
877
+ "learning_rate": 2.4155995343422587e-05,
878
+ "logits/chosen": -3.759662628173828,
879
+ "logits/rejected": -3.7599411010742188,
880
+ "logps/chosen": -6315.06787109375,
881
+ "logps/rejected": -5507.916015625,
882
+ "loss": 0.01,
883
+ "pred_label": 6938.4501953125,
884
+ "rewards/accuracies": 0.41874998807907104,
885
+ "rewards/chosen": -602.653076171875,
886
+ "rewards/margins": -78.45845031738281,
887
+ "rewards/rejected": -524.1947021484375,
888
+ "step": 540,
889
+ "use_label": 1623.550048828125
890
+ },
891
+ {
892
+ "epoch": 0.58,
893
+ "learning_rate": 2.3573923166472644e-05,
894
+ "logits/chosen": -3.738492488861084,
895
+ "logits/rejected": -3.7378597259521484,
896
+ "logps/chosen": -5971.4853515625,
897
+ "logps/rejected": -5198.08935546875,
898
+ "loss": 0.0129,
899
+ "pred_label": 7093.9501953125,
900
+ "rewards/accuracies": 0.4000000059604645,
901
+ "rewards/chosen": -568.2282104492188,
902
+ "rewards/margins": -74.6135025024414,
903
+ "rewards/rejected": -493.61468505859375,
904
+ "step": 550,
905
+ "use_label": 1628.050048828125
906
  },
907
  {
908
  "epoch": 0.59,
909
+ "learning_rate": 2.2991850989522702e-05,
910
+ "logits/chosen": -3.794232130050659,
911
+ "logits/rejected": -3.793727397918701,
912
+ "logps/chosen": -5239.75048828125,
913
+ "logps/rejected": -4281.4697265625,
914
+ "loss": 0.0136,
915
+ "pred_label": 7250.25,
916
+ "rewards/accuracies": 0.41874998807907104,
917
+ "rewards/chosen": -496.53460693359375,
918
+ "rewards/margins": -90.13624572753906,
919
+ "rewards/rejected": -406.3983459472656,
920
+ "step": 560,
921
+ "use_label": 1631.75
922
+ },
923
+ {
924
+ "epoch": 0.6,
925
+ "learning_rate": 2.240977881257276e-05,
926
+ "logits/chosen": -3.7495296001434326,
927
+ "logits/rejected": -3.7504706382751465,
928
+ "logps/chosen": -6018.4404296875,
929
+ "logps/rejected": -5286.20751953125,
930
+ "loss": 0.0237,
931
+ "pred_label": 7407.02490234375,
932
+ "rewards/accuracies": 0.4124999940395355,
933
+ "rewards/chosen": -574.8952026367188,
934
+ "rewards/margins": -72.47772979736328,
935
+ "rewards/rejected": -502.41748046875,
936
+ "step": 570,
937
+ "use_label": 1634.9749755859375
938
  },
939
  {
940
  "epoch": 0.61,
941
+ "learning_rate": 2.1827706635622818e-05,
942
+ "logits/chosen": -3.7940216064453125,
943
+ "logits/rejected": -3.794236421585083,
944
+ "logps/chosen": -5965.88134765625,
945
+ "logps/rejected": -4998.4501953125,
946
+ "loss": 0.0206,
947
+ "pred_label": 7561.5498046875,
948
+ "rewards/accuracies": 0.3812499940395355,
949
+ "rewards/chosen": -568.77734375,
950
+ "rewards/margins": -95.3790054321289,
951
+ "rewards/rejected": -473.3983459472656,
952
+ "step": 580,
953
+ "use_label": 1640.449951171875
954
+ },
955
+ {
956
+ "epoch": 0.62,
957
+ "learning_rate": 2.124563445867288e-05,
958
+ "logits/chosen": -3.725088119506836,
959
+ "logits/rejected": -3.7297370433807373,
960
+ "logps/chosen": -5610.734375,
961
+ "logps/rejected": -5206.3388671875,
962
+ "loss": 0.0217,
963
+ "pred_label": 7717.9248046875,
964
+ "rewards/accuracies": 0.4312500059604645,
965
+ "rewards/chosen": -533.9136962890625,
966
+ "rewards/margins": -41.3577766418457,
967
+ "rewards/rejected": -492.555908203125,
968
+ "step": 590,
969
+ "use_label": 1644.074951171875
970
  },
971
  {
972
  "epoch": 0.63,
973
+ "learning_rate": 2.0663562281722934e-05,
974
+ "logits/chosen": -2.4542346000671387,
975
+ "logits/rejected": -2.457996129989624,
976
+ "logps/chosen": -5316.2861328125,
977
+ "logps/rejected": -4824.51171875,
978
+ "loss": 0.0176,
979
+ "pred_label": 7873.5498046875,
980
+ "rewards/accuracies": 0.4749999940395355,
981
+ "rewards/chosen": -500.2923889160156,
982
+ "rewards/margins": -48.11725616455078,
983
+ "rewards/rejected": -452.1751403808594,
984
+ "step": 600,
985
+ "use_label": 1648.449951171875
986
+ },
987
+ {
988
+ "epoch": 0.64,
989
+ "learning_rate": 2.0081490104772992e-05,
990
+ "logits/chosen": 1.6535043716430664,
991
+ "logits/rejected": 1.6919664144515991,
992
+ "logps/chosen": -4125.20458984375,
993
+ "logps/rejected": -3309.930419921875,
994
+ "loss": 0.019,
995
+ "pred_label": 8029.0,
996
+ "rewards/accuracies": 0.3687500059604645,
997
+ "rewards/chosen": -381.9295349121094,
998
+ "rewards/margins": -74.89913177490234,
999
+ "rewards/rejected": -307.0304260253906,
1000
+ "step": 610,
1001
+ "use_label": 1653.0
1002
  },
1003
  {
1004
  "epoch": 0.65,
1005
+ "learning_rate": 1.9499417927823053e-05,
1006
+ "logits/chosen": 3.7263665199279785,
1007
+ "logits/rejected": 3.714616298675537,
1008
+ "logps/chosen": -5211.14453125,
1009
+ "logps/rejected": -4633.3828125,
1010
+ "loss": 0.0148,
1011
+ "pred_label": 8184.77490234375,
1012
+ "rewards/accuracies": 0.4375,
1013
+ "rewards/chosen": -493.1163024902344,
1014
+ "rewards/margins": -54.99699783325195,
1015
+ "rewards/rejected": -438.11932373046875,
1016
+ "step": 620,
1017
+ "use_label": 1657.2249755859375
1018
+ },
1019
+ {
1020
+ "epoch": 0.66,
1021
+ "learning_rate": 1.8917345750873107e-05,
1022
+ "logits/chosen": 7.3053741455078125,
1023
+ "logits/rejected": 7.303783416748047,
1024
+ "logps/chosen": -7381.1630859375,
1025
+ "logps/rejected": -6444.02734375,
1026
+ "loss": 0.009,
1027
+ "pred_label": 8343.1748046875,
1028
+ "rewards/accuracies": 0.41874998807907104,
1029
+ "rewards/chosen": -709.6043701171875,
1030
+ "rewards/margins": -91.3189697265625,
1031
+ "rewards/rejected": -618.2854614257812,
1032
+ "step": 630,
1033
+ "use_label": 1658.824951171875
1034
  },
1035
  {
1036
  "epoch": 0.67,
1037
+ "learning_rate": 1.833527357392317e-05,
1038
+ "logits/chosen": 8.230302810668945,
1039
+ "logits/rejected": 8.22825813293457,
1040
+ "logps/chosen": -7595.42724609375,
1041
+ "logps/rejected": -7036.8515625,
1042
+ "loss": 0.0111,
1043
+ "pred_label": 8500.5,
1044
+ "rewards/accuracies": 0.4749999940395355,
1045
+ "rewards/chosen": -734.2236328125,
1046
+ "rewards/margins": -55.39581298828125,
1047
+ "rewards/rejected": -678.8277587890625,
1048
+ "step": 640,
1049
+ "use_label": 1661.5
1050
+ },
1051
+ {
1052
+ "epoch": 0.68,
1053
+ "learning_rate": 1.7753201396973227e-05,
1054
+ "logits/chosen": 8.20081901550293,
1055
+ "logits/rejected": 8.195457458496094,
1056
+ "logps/chosen": -9194.9013671875,
1057
+ "logps/rejected": -7898.6552734375,
1058
+ "loss": 0.0088,
1059
+ "pred_label": 8658.349609375,
1060
+ "rewards/accuracies": 0.4437499940395355,
1061
+ "rewards/chosen": -889.2952880859375,
1062
+ "rewards/margins": -127.2280044555664,
1063
+ "rewards/rejected": -762.0673217773438,
1064
+ "step": 650,
1065
+ "use_label": 1663.6500244140625
1066
  },
1067
  {
1068
  "epoch": 0.69,
1069
+ "learning_rate": 1.717112922002328e-05,
1070
+ "logits/chosen": 9.882159233093262,
1071
+ "logits/rejected": 9.892133712768555,
1072
+ "logps/chosen": -10026.548828125,
1073
+ "logps/rejected": -8868.25,
1074
+ "loss": 0.0147,
1075
+ "pred_label": 8817.3251953125,
1076
+ "rewards/accuracies": 0.4312500059604645,
1077
+ "rewards/chosen": -973.1184692382812,
1078
+ "rewards/margins": -111.28104400634766,
1079
+ "rewards/rejected": -861.83740234375,
1080
+ "step": 660,
1081
+ "use_label": 1664.675048828125
1082
+ },
1083
+ {
1084
+ "epoch": 0.7,
1085
+ "learning_rate": 1.6589057043073342e-05,
1086
+ "logits/chosen": 11.399931907653809,
1087
+ "logits/rejected": 11.406278610229492,
1088
+ "logps/chosen": -11008.333984375,
1089
+ "logps/rejected": -9124.1875,
1090
+ "loss": 0.0161,
1091
+ "pred_label": 8974.8251953125,
1092
+ "rewards/accuracies": 0.44999998807907104,
1093
+ "rewards/chosen": -1072.7261962890625,
1094
+ "rewards/margins": -184.44720458984375,
1095
+ "rewards/rejected": -888.2789916992188,
1096
+ "step": 670,
1097
+ "use_label": 1667.175048828125
1098
  },
1099
  {
1100
  "epoch": 0.71,
1101
+ "learning_rate": 1.60069848661234e-05,
1102
+ "logits/chosen": 9.982951164245605,
1103
+ "logits/rejected": 9.928037643432617,
1104
+ "logps/chosen": -10043.669921875,
1105
+ "logps/rejected": -9005.763671875,
1106
+ "loss": 0.0146,
1107
+ "pred_label": 9133.150390625,
1108
+ "rewards/accuracies": 0.4749999940395355,
1109
+ "rewards/chosen": -975.6735229492188,
1110
+ "rewards/margins": -100.92012023925781,
1111
+ "rewards/rejected": -874.75341796875,
1112
+ "step": 680,
1113
+ "use_label": 1668.8499755859375
1114
+ },
1115
+ {
1116
+ "epoch": 0.72,
1117
+ "learning_rate": 1.5424912689173458e-05,
1118
+ "logits/chosen": 3.900209903717041,
1119
+ "logits/rejected": 3.7533345222473145,
1120
+ "logps/chosen": -5247.2783203125,
1121
+ "logps/rejected": -4165.42138671875,
1122
+ "loss": 0.015,
1123
+ "pred_label": 9290.625,
1124
+ "rewards/accuracies": 0.39375001192092896,
1125
+ "rewards/chosen": -499.68951416015625,
1126
+ "rewards/margins": -104.0757827758789,
1127
+ "rewards/rejected": -395.61370849609375,
1128
+ "step": 690,
1129
+ "use_label": 1671.375
1130
  },
1131
  {
1132
  "epoch": 0.73,
1133
+ "learning_rate": 1.4842840512223516e-05,
1134
+ "logits/chosen": 3.9459800720214844,
1135
+ "logits/rejected": 3.741647243499756,
1136
+ "logps/chosen": -6615.76708984375,
1137
+ "logps/rejected": -5040.81982421875,
1138
+ "loss": 0.0158,
1139
+ "pred_label": 9445.275390625,
1140
+ "rewards/accuracies": 0.40625,
1141
+ "rewards/chosen": -634.0076904296875,
1142
+ "rewards/margins": -154.7536163330078,
1143
+ "rewards/rejected": -479.25408935546875,
1144
+ "step": 700,
1145
+ "use_label": 1676.7249755859375
1146
+ },
1147
+ {
1148
+ "epoch": 0.74,
1149
+ "learning_rate": 1.4260768335273575e-05,
1150
+ "logits/chosen": 5.425192832946777,
1151
+ "logits/rejected": 5.073692321777344,
1152
+ "logps/chosen": -8362.833984375,
1153
+ "logps/rejected": -6741.9013671875,
1154
+ "loss": 0.0127,
1155
+ "pred_label": 9602.0498046875,
1156
+ "rewards/accuracies": 0.38749998807907104,
1157
+ "rewards/chosen": -807.3259887695312,
1158
+ "rewards/margins": -157.26266479492188,
1159
+ "rewards/rejected": -650.0633544921875,
1160
+ "step": 710,
1161
+ "use_label": 1679.949951171875
1162
  },
1163
  {
1164
  "epoch": 0.75,
1165
+ "learning_rate": 1.3678696158323633e-05,
1166
+ "logits/chosen": 10.10822582244873,
1167
+ "logits/rejected": 10.002889633178711,
1168
+ "logps/chosen": -10245.421875,
1169
+ "logps/rejected": -9104.876953125,
1170
+ "loss": 0.023,
1171
+ "pred_label": 9759.0751953125,
1172
+ "rewards/accuracies": 0.4749999940395355,
1173
+ "rewards/chosen": -997.3968505859375,
1174
+ "rewards/margins": -112.24949645996094,
1175
+ "rewards/rejected": -885.1474609375,
1176
+ "step": 720,
1177
+ "use_label": 1682.925048828125
1178
+ },
1179
+ {
1180
+ "epoch": 0.76,
1181
+ "learning_rate": 1.309662398137369e-05,
1182
+ "logits/chosen": 10.97143840789795,
1183
+ "logits/rejected": 10.992796897888184,
1184
+ "logps/chosen": -10079.0634765625,
1185
+ "logps/rejected": -8320.7255859375,
1186
+ "loss": 0.0134,
1187
+ "pred_label": 9917.2001953125,
1188
+ "rewards/accuracies": 0.4000000059604645,
1189
+ "rewards/chosen": -978.1613159179688,
1190
+ "rewards/margins": -170.25567626953125,
1191
+ "rewards/rejected": -807.9056396484375,
1192
+ "step": 730,
1193
+ "use_label": 1684.800048828125
1194
  },
1195
  {
1196
  "epoch": 0.77,
1197
+ "learning_rate": 1.2514551804423749e-05,
1198
+ "logits/chosen": 12.233144760131836,
1199
+ "logits/rejected": 12.248846054077148,
1200
+ "logps/chosen": -12818.298828125,
1201
+ "logps/rejected": -11287.875,
1202
+ "loss": 0.0048,
1203
+ "pred_label": 10076.650390625,
1204
+ "rewards/accuracies": 0.45625001192092896,
1205
+ "rewards/chosen": -1251.0927734375,
1206
+ "rewards/margins": -150.9541473388672,
1207
+ "rewards/rejected": -1100.138671875,
1208
+ "step": 740,
1209
+ "use_label": 1685.3499755859375
1210
+ },
1211
+ {
1212
+ "epoch": 0.79,
1213
+ "learning_rate": 1.1932479627473807e-05,
1214
+ "logits/chosen": 12.499679565429688,
1215
+ "logits/rejected": 12.485097885131836,
1216
+ "logps/chosen": -11923.8232421875,
1217
+ "logps/rejected": -10479.5771484375,
1218
+ "loss": 0.0074,
1219
+ "pred_label": 10235.875,
1220
+ "rewards/accuracies": 0.4312500059604645,
1221
+ "rewards/chosen": -1166.3336181640625,
1222
+ "rewards/margins": -144.428466796875,
1223
+ "rewards/rejected": -1021.9050903320312,
1224
+ "step": 750,
1225
+ "use_label": 1686.125
1226
  },
1227
  {
1228
  "epoch": 0.8,
1229
+ "learning_rate": 1.1350407450523866e-05,
1230
+ "logits/chosen": 7.414717197418213,
1231
+ "logits/rejected": 7.40515661239624,
1232
+ "logps/chosen": -9329.333984375,
1233
+ "logps/rejected": -8092.07177734375,
1234
+ "loss": 0.0077,
1235
+ "pred_label": 10393.625,
1236
+ "rewards/accuracies": 0.44999998807907104,
1237
+ "rewards/chosen": -903.3870849609375,
1238
+ "rewards/margins": -120.39128112792969,
1239
+ "rewards/rejected": -782.995849609375,
1240
+ "step": 760,
1241
+ "use_label": 1688.375
1242
+ },
1243
+ {
1244
+ "epoch": 0.81,
1245
+ "learning_rate": 1.0768335273573923e-05,
1246
+ "logits/chosen": 3.0171780586242676,
1247
+ "logits/rejected": 2.9968318939208984,
1248
+ "logps/chosen": -6287.14453125,
1249
+ "logps/rejected": -5580.78515625,
1250
+ "loss": 0.0133,
1251
+ "pred_label": 10549.275390625,
1252
+ "rewards/accuracies": 0.48124998807907104,
1253
+ "rewards/chosen": -598.4849243164062,
1254
+ "rewards/margins": -68.8787841796875,
1255
+ "rewards/rejected": -529.6060791015625,
1256
+ "step": 770,
1257
+ "use_label": 1692.7249755859375
1258
  },
1259
  {
1260
  "epoch": 0.82,
1261
+ "learning_rate": 1.0186263096623982e-05,
1262
+ "logits/chosen": -1.7731034755706787,
1263
+ "logits/rejected": -1.784906029701233,
1264
+ "logps/chosen": -4869.2548828125,
1265
+ "logps/rejected": -4165.048828125,
1266
+ "loss": 0.0135,
1267
+ "pred_label": 10705.75,
1268
+ "rewards/accuracies": 0.42500001192092896,
1269
+ "rewards/chosen": -458.80615234375,
1270
+ "rewards/margins": -67.01484680175781,
1271
+ "rewards/rejected": -391.7913513183594,
1272
+ "step": 780,
1273
+ "use_label": 1696.25
1274
+ },
1275
+ {
1276
+ "epoch": 0.83,
1277
+ "learning_rate": 9.60419091967404e-06,
1278
+ "logits/chosen": -0.7930339574813843,
1279
+ "logits/rejected": -0.8520814180374146,
1280
+ "logps/chosen": -4772.41162109375,
1281
+ "logps/rejected": -4426.7998046875,
1282
+ "loss": 0.0181,
1283
+ "pred_label": 10861.849609375,
1284
+ "rewards/accuracies": 0.42500001192092896,
1285
+ "rewards/chosen": -450.46026611328125,
1286
+ "rewards/margins": -33.853233337402344,
1287
+ "rewards/rejected": -416.60699462890625,
1288
+ "step": 790,
1289
+ "use_label": 1700.1500244140625
1290
  },
1291
  {
1292
  "epoch": 0.84,
1293
+ "learning_rate": 9.022118742724098e-06,
1294
+ "logits/chosen": -2.1026828289031982,
1295
+ "logits/rejected": -2.1392974853515625,
1296
+ "logps/chosen": -5048.7392578125,
1297
+ "logps/rejected": -4407.5849609375,
1298
+ "loss": 0.0168,
1299
+ "pred_label": 11020.3251953125,
1300
+ "rewards/accuracies": 0.4625000059604645,
1301
+ "rewards/chosen": -473.4267578125,
1302
+ "rewards/margins": -60.274497985839844,
1303
+ "rewards/rejected": -413.1521911621094,
1304
+ "step": 800,
1305
+ "use_label": 1701.675048828125
1306
+ },
1307
+ {
1308
+ "epoch": 0.85,
1309
+ "learning_rate": 8.440046565774158e-06,
1310
+ "logits/chosen": -1.4834654331207275,
1311
+ "logits/rejected": -1.5466824769973755,
1312
+ "logps/chosen": -3907.274169921875,
1313
+ "logps/rejected": -3107.385986328125,
1314
+ "loss": 0.014,
1315
+ "pred_label": 11177.2001953125,
1316
+ "rewards/accuracies": 0.3812499940395355,
1317
+ "rewards/chosen": -362.62506103515625,
1318
+ "rewards/margins": -74.76776885986328,
1319
+ "rewards/rejected": -287.8572692871094,
1320
+ "step": 810,
1321
+ "use_label": 1704.800048828125
1322
  },
1323
  {
1324
  "epoch": 0.86,
1325
+ "learning_rate": 7.857974388824214e-06,
1326
+ "logits/chosen": -0.9667215347290039,
1327
+ "logits/rejected": -1.0632926225662231,
1328
+ "logps/chosen": -3960.51708984375,
1329
+ "logps/rejected": -3172.82275390625,
1330
+ "loss": 0.0275,
1331
+ "pred_label": 11334.25,
1332
+ "rewards/accuracies": 0.4000000059604645,
1333
+ "rewards/chosen": -366.71990966796875,
1334
+ "rewards/margins": -73.59380340576172,
1335
+ "rewards/rejected": -293.1261291503906,
1336
+ "step": 820,
1337
+ "use_label": 1707.75
1338
+ },
1339
+ {
1340
+ "epoch": 0.87,
1341
+ "learning_rate": 7.275902211874273e-06,
1342
+ "logits/chosen": 3.487344264984131,
1343
+ "logits/rejected": 3.3718509674072266,
1344
+ "logps/chosen": -5937.5302734375,
1345
+ "logps/rejected": -6147.76416015625,
1346
+ "loss": 0.014,
1347
+ "pred_label": 11490.599609375,
1348
+ "rewards/accuracies": 0.5625,
1349
+ "rewards/chosen": -567.4717407226562,
1350
+ "rewards/margins": 19.284542083740234,
1351
+ "rewards/rejected": -586.7562255859375,
1352
+ "step": 830,
1353
+ "use_label": 1711.4000244140625
1354
  },
1355
  {
1356
  "epoch": 0.88,
1357
+ "learning_rate": 6.693830034924331e-06,
1358
+ "logits/chosen": 11.813470840454102,
1359
+ "logits/rejected": 11.792594909667969,
1360
+ "logps/chosen": -11349.767578125,
1361
+ "logps/rejected": -10712.0576171875,
1362
+ "loss": 0.01,
1363
+ "pred_label": 11647.599609375,
1364
+ "rewards/accuracies": 0.4749999940395355,
1365
+ "rewards/chosen": -1108.175048828125,
1366
+ "rewards/margins": -63.417640686035156,
1367
+ "rewards/rejected": -1044.7574462890625,
1368
+ "step": 840,
1369
+ "use_label": 1714.4000244140625
1370
+ },
1371
+ {
1372
+ "epoch": 0.89,
1373
+ "learning_rate": 6.111757857974389e-06,
1374
+ "logits/chosen": 12.407671928405762,
1375
+ "logits/rejected": 12.412581443786621,
1376
+ "logps/chosen": -12044.3359375,
1377
+ "logps/rejected": -10440.21875,
1378
+ "loss": 0.0137,
1379
+ "pred_label": 11806.0498046875,
1380
+ "rewards/accuracies": 0.41874998807907104,
1381
+ "rewards/chosen": -1174.930908203125,
1382
+ "rewards/margins": -155.24510192871094,
1383
+ "rewards/rejected": -1019.6856689453125,
1384
+ "step": 850,
1385
+ "use_label": 1715.949951171875
1386
  },
1387
  {
1388
  "epoch": 0.9,
1389
+ "learning_rate": 5.529685681024447e-06,
1390
+ "logits/chosen": 13.048141479492188,
1391
+ "logits/rejected": 13.045463562011719,
1392
+ "logps/chosen": -12283.849609375,
1393
+ "logps/rejected": -11249.0595703125,
1394
+ "loss": 0.0112,
1395
+ "pred_label": 11965.0,
1396
+ "rewards/accuracies": 0.4437499940395355,
1397
+ "rewards/chosen": -1199.896728515625,
1398
+ "rewards/margins": -102.23991394042969,
1399
+ "rewards/rejected": -1097.6568603515625,
1400
+ "step": 860,
1401
+ "use_label": 1717.0
1402
+ },
1403
+ {
1404
+ "epoch": 0.91,
1405
+ "learning_rate": 4.947613504074506e-06,
1406
+ "logits/chosen": 13.306634902954102,
1407
+ "logits/rejected": 13.326390266418457,
1408
+ "logps/chosen": -10968.7822265625,
1409
+ "logps/rejected": -10457.6435546875,
1410
+ "loss": 0.0108,
1411
+ "pred_label": 12123.5751953125,
1412
+ "rewards/accuracies": 0.543749988079071,
1413
+ "rewards/chosen": -1072.5794677734375,
1414
+ "rewards/margins": -50.84003448486328,
1415
+ "rewards/rejected": -1021.7394409179688,
1416
+ "step": 870,
1417
+ "use_label": 1718.425048828125
1418
  },
1419
  {
1420
  "epoch": 0.92,
1421
+ "learning_rate": 4.3655413271245635e-06,
1422
+ "logits/chosen": 13.300872802734375,
1423
+ "logits/rejected": 13.316276550292969,
1424
+ "logps/chosen": -13030.8095703125,
1425
+ "logps/rejected": -11216.4794921875,
1426
+ "loss": 0.0078,
1427
+ "pred_label": 12279.9501953125,
1428
+ "rewards/accuracies": 0.4749999940395355,
1429
+ "rewards/chosen": -1274.422119140625,
1430
+ "rewards/margins": -178.9391632080078,
1431
+ "rewards/rejected": -1095.4830322265625,
1432
+ "step": 880,
1433
+ "use_label": 1722.050048828125
1434
+ },
1435
+ {
1436
+ "epoch": 0.93,
1437
+ "learning_rate": 3.7834691501746217e-06,
1438
+ "logits/chosen": 13.323100090026855,
1439
+ "logits/rejected": 13.341893196105957,
1440
+ "logps/chosen": -13646.083984375,
1441
+ "logps/rejected": -12056.134765625,
1442
+ "loss": 0.0108,
1443
+ "pred_label": 12438.625,
1444
+ "rewards/accuracies": 0.4124999940395355,
1445
+ "rewards/chosen": -1336.4798583984375,
1446
+ "rewards/margins": -158.06932067871094,
1447
+ "rewards/rejected": -1178.41064453125,
1448
+ "step": 890,
1449
+ "use_label": 1723.375
1450
  },
1451
  {
1452
  "epoch": 0.94,
1453
+ "learning_rate": 3.2013969732246805e-06,
1454
+ "logits/chosen": 13.762173652648926,
1455
+ "logits/rejected": 13.755559921264648,
1456
+ "logps/chosen": -13121.990234375,
1457
+ "logps/rejected": -10966.107421875,
1458
+ "loss": 0.0193,
1459
+ "pred_label": 12596.1748046875,
1460
+ "rewards/accuracies": 0.45625001192092896,
1461
+ "rewards/chosen": -1284.2850341796875,
1462
+ "rewards/margins": -213.5261688232422,
1463
+ "rewards/rejected": -1070.7589111328125,
1464
+ "step": 900,
1465
+ "use_label": 1725.824951171875
1466
+ },
1467
+ {
1468
+ "epoch": 0.95,
1469
+ "learning_rate": 2.6193247962747383e-06,
1470
+ "logits/chosen": 13.810602188110352,
1471
+ "logits/rejected": 13.802284240722656,
1472
+ "logps/chosen": -13679.8857421875,
1473
+ "logps/rejected": -11569.97265625,
1474
+ "loss": 0.0091,
1475
+ "pred_label": 12752.150390625,
1476
+ "rewards/accuracies": 0.4312500059604645,
1477
+ "rewards/chosen": -1338.3470458984375,
1478
+ "rewards/margins": -207.6631317138672,
1479
+ "rewards/rejected": -1130.6839599609375,
1480
+ "step": 910,
1481
+ "use_label": 1729.8499755859375
1482
  },
1483
  {
1484
  "epoch": 0.96,
1485
+ "learning_rate": 2.037252619324796e-06,
1486
+ "logits/chosen": 13.879419326782227,
1487
+ "logits/rejected": 13.855003356933594,
1488
+ "logps/chosen": -14082.59375,
1489
+ "logps/rejected": -11603.6435546875,
1490
+ "loss": 0.0145,
1491
+ "pred_label": 12909.7998046875,
1492
+ "rewards/accuracies": 0.3687500059604645,
1493
+ "rewards/chosen": -1378.5706787109375,
1494
+ "rewards/margins": -244.1646270751953,
1495
+ "rewards/rejected": -1134.406005859375,
1496
+ "step": 920,
1497
+ "use_label": 1732.199951171875
1498
+ },
1499
+ {
1500
+ "epoch": 0.97,
1501
+ "learning_rate": 1.4551804423748545e-06,
1502
+ "logits/chosen": 13.563482284545898,
1503
+ "logits/rejected": 13.548616409301758,
1504
+ "logps/chosen": -13257.4296875,
1505
+ "logps/rejected": -10279.7294921875,
1506
+ "loss": 0.0103,
1507
+ "pred_label": 13067.3251953125,
1508
+ "rewards/accuracies": 0.45625001192092896,
1509
+ "rewards/chosen": -1298.921630859375,
1510
+ "rewards/margins": -295.1283264160156,
1511
+ "rewards/rejected": -1003.79345703125,
1512
+ "step": 930,
1513
+ "use_label": 1734.675048828125
1514
  },
1515
  {
1516
  "epoch": 0.98,
1517
+ "learning_rate": 8.731082654249127e-07,
1518
+ "logits/chosen": 13.876431465148926,
1519
+ "logits/rejected": 13.86772632598877,
1520
+ "logps/chosen": -14265.5234375,
1521
+ "logps/rejected": -11806.12890625,
1522
+ "loss": 0.0096,
1523
+ "pred_label": 13226.525390625,
1524
+ "rewards/accuracies": 0.3812499940395355,
1525
+ "rewards/chosen": -1397.813232421875,
1526
+ "rewards/margins": -243.88876342773438,
1527
+ "rewards/rejected": -1153.92431640625,
1528
+ "step": 940,
1529
+ "use_label": 1735.4749755859375
1530
+ },
1531
+ {
1532
+ "epoch": 0.99,
1533
+ "learning_rate": 2.910360884749709e-07,
1534
+ "logits/chosen": 13.791536331176758,
1535
+ "logits/rejected": 13.786079406738281,
1536
+ "logps/chosen": -12623.412109375,
1537
+ "logps/rejected": -11098.677734375,
1538
+ "loss": 0.0144,
1539
+ "pred_label": 13384.7998046875,
1540
+ "rewards/accuracies": 0.4749999940395355,
1541
+ "rewards/chosen": -1235.495849609375,
1542
+ "rewards/margins": -150.9226837158203,
1543
+ "rewards/rejected": -1084.572998046875,
1544
+ "step": 950,
1545
+ "use_label": 1737.199951171875
1546
  },
1547
  {
1548
  "epoch": 1.0,
1549
+ "eval_logits/chosen": 13.824411392211914,
1550
+ "eval_logits/rejected": 13.813151359558105,
1551
+ "eval_logps/chosen": -13722.0166015625,
1552
+ "eval_logps/rejected": -11596.5400390625,
1553
+ "eval_loss": 0.011624496430158615,
1554
+ "eval_pred_label": 13789.83984375,
1555
+ "eval_rewards/accuracies": 0.4740000069141388,
1556
+ "eval_rewards/chosen": -1343.776123046875,
1557
+ "eval_rewards/margins": -210.05210876464844,
1558
+ "eval_rewards/rejected": -1133.72412109375,
1559
+ "eval_runtime": 449.9968,
1560
+ "eval_samples_per_second": 4.444,
1561
+ "eval_steps_per_second": 0.278,
1562
+ "eval_use_label": 1742.1600341796875,
1563
+ "step": 955
1564
  },
1565
  {
1566
  "epoch": 1.0,
1567
+ "step": 955,
1568
  "total_flos": 0.0,
1569
+ "train_loss": 0.08065580570807007,
1570
+ "train_runtime": 25025.0638,
1571
+ "train_samples_per_second": 2.443,
1572
+ "train_steps_per_second": 0.038
1573
  }
1574
  ],
1575
  "logging_steps": 10,
1576
+ "max_steps": 955,
1577
  "num_train_epochs": 1,
1578
  "save_steps": 50,
1579
  "total_flos": 0.0,