RikkiXu commited on
Commit
98a5bff
1 Parent(s): 6f3c750

Model save

Browse files
README.md CHANGED
@@ -52,7 +52,7 @@ The following hyperparameters were used during training:
52
 
53
  ### Framework versions
54
 
55
- - Transformers 4.38.2
56
  - Pytorch 2.1.2+cu118
57
  - Datasets 2.16.1
58
- - Tokenizers 0.15.2
 
52
 
53
  ### Framework versions
54
 
55
+ - Transformers 4.41.1
56
  - Pytorch 2.1.2+cu118
57
  - Datasets 2.16.1
58
+ - Tokenizers 0.19.1
all_results.json CHANGED
@@ -1,8 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.42979541909757746,
4
- "train_runtime": 5368.3646,
5
- "train_samples": 48530,
6
- "train_samples_per_second": 9.04,
7
- "train_steps_per_second": 0.035
 
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.4268451908656529,
5
+ "train_runtime": 5571.7779,
6
+ "train_samples": 44755,
7
+ "train_samples_per_second": 8.032,
8
+ "train_steps_per_second": 0.031
9
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 32000,
5
- "transformers_version": "4.38.2"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 32000,
5
+ "transformers_version": "4.41.1"
6
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:356dc2b1355d662c141aef81f6cc89001c178e4007968f89b8978b8150436157
3
  size 4943178720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee8452a398b257c4a92a8643d9b1dfe39769d290e7c90c76b86b619836911c45
3
  size 4943178720
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f1d3e3ba731f6817b54fbce899547aa3234b1ac6c106bb71917516260d9eb90
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee6a898b9636aed352abe6efb776fc9d203259465cf8a6ffff1063f02fa01257
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68e0cc8151a7384be178ba6186d9b46fe0d7d5bcec31517e8e6d3b801f63aec5
3
  size 4540532728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f22fcd73a88b87f23b169dac783932266e459f67992280e6211fdc500dbf6a1d
3
  size 4540532728
runs/Jun05_16-45-33_n136-082-130/events.out.tfevents.1717577207.n136-082-130.1671269.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45f886db2c2d590c31df3e18a510a66b7c1b3d72d9d53a9ab721ac742739d02a
3
- size 12583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:677d4dc19859dd093eed3aeb0022a132de8f6ce1d85c343d2af84f7e925a454c
3
+ size 17729
train_results.json CHANGED
@@ -1,8 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.42979541909757746,
4
- "train_runtime": 5368.3646,
5
- "train_samples": 48530,
6
- "train_samples_per_second": 9.04,
7
- "train_steps_per_second": 0.035
 
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.4268451908656529,
5
+ "train_runtime": 5571.7779,
6
+ "train_samples": 44755,
7
+ "train_samples_per_second": 8.032,
8
+ "train_steps_per_second": 0.031
9
  }
trainer_state.json CHANGED
@@ -1,22 +1,22 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9960474308300395,
5
  "eval_steps": 500,
6
- "global_step": 189,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.01,
13
- "grad_norm": 26.908694644642612,
14
- "learning_rate": 2.6315789473684208e-08,
15
- "logits/chosen": -4.638427734375,
16
- "logits/rejected": -4.891327857971191,
17
- "logps/chosen": -198.52749633789062,
18
- "logps/rejected": -147.3392791748047,
19
- "loss": 0.6929,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
22
  "rewards/margins": 0.0,
@@ -24,290 +24,287 @@
24
  "step": 1
25
  },
26
  {
27
- "epoch": 0.05,
28
- "grad_norm": 27.07664091604203,
29
- "learning_rate": 2.631578947368421e-07,
30
- "logits/chosen": -4.496448993682861,
31
- "logits/rejected": -4.815927028656006,
32
- "logps/chosen": -224.28125,
33
- "logps/rejected": -167.94735717773438,
34
- "loss": 0.6915,
35
  "rewards/accuracies": 0.5034722089767456,
36
- "rewards/chosen": 0.0034646072890609503,
37
- "rewards/margins": 0.002639756305143237,
38
- "rewards/rejected": 0.0008248506928794086,
39
  "step": 10
40
  },
41
  {
42
- "epoch": 0.11,
43
- "grad_norm": 26.30402064096193,
44
- "learning_rate": 4.999573126145131e-07,
45
- "logits/chosen": -4.625959873199463,
46
- "logits/rejected": -4.94482421875,
47
- "logps/chosen": -231.04525756835938,
48
- "logps/rejected": -196.3661651611328,
49
- "loss": 0.6465,
50
- "rewards/accuracies": 0.8125,
51
- "rewards/chosen": -0.04837086424231529,
52
- "rewards/margins": 0.11359457671642303,
53
- "rewards/rejected": -0.16196544468402863,
54
  "step": 20
55
  },
56
  {
57
- "epoch": 0.16,
58
- "grad_norm": 25.70165553073792,
59
- "learning_rate": 4.948524419003415e-07,
60
- "logits/chosen": -4.869608402252197,
61
- "logits/rejected": -5.148451805114746,
62
- "logps/chosen": -273.7060241699219,
63
- "logps/rejected": -259.2108154296875,
64
- "loss": 0.5717,
65
- "rewards/accuracies": 0.778124988079071,
66
- "rewards/chosen": -0.5196550488471985,
67
- "rewards/margins": 0.3445150554180145,
68
- "rewards/rejected": -0.8641700744628906,
69
  "step": 30
70
  },
71
  {
72
- "epoch": 0.21,
73
- "grad_norm": 32.38040367732233,
74
- "learning_rate": 4.81409414945389e-07,
75
- "logits/chosen": -4.95624303817749,
76
- "logits/rejected": -5.334275245666504,
77
- "logps/chosen": -321.26739501953125,
78
- "logps/rejected": -317.9222106933594,
79
- "loss": 0.5311,
80
- "rewards/accuracies": 0.793749988079071,
81
- "rewards/chosen": -0.9283856153488159,
82
- "rewards/margins": 0.48562851548194885,
83
- "rewards/rejected": -1.4140141010284424,
84
  "step": 40
85
  },
86
  {
87
- "epoch": 0.26,
88
- "grad_norm": 27.107807886309228,
89
- "learning_rate": 4.6008601790947314e-07,
90
- "logits/chosen": -5.323241233825684,
91
- "logits/rejected": -5.817015171051025,
92
- "logps/chosen": -357.8787536621094,
93
- "logps/rejected": -385.47576904296875,
94
- "loss": 0.4831,
95
- "rewards/accuracies": 0.8500000238418579,
96
- "rewards/chosen": -1.411299467086792,
97
- "rewards/margins": 0.7530988454818726,
98
- "rewards/rejected": -2.164398193359375,
99
  "step": 50
100
  },
101
  {
102
- "epoch": 0.32,
103
- "grad_norm": 32.232061879934236,
104
- "learning_rate": 4.3160839350405605e-07,
105
- "logits/chosen": -5.831389904022217,
106
- "logits/rejected": -6.2499542236328125,
107
- "logps/chosen": -395.7707824707031,
108
- "logps/rejected": -446.3265686035156,
109
- "loss": 0.4294,
110
- "rewards/accuracies": 0.778124988079071,
111
- "rewards/chosen": -1.821434736251831,
112
- "rewards/margins": 0.9432821273803711,
113
- "rewards/rejected": -2.7647171020507812,
114
  "step": 60
115
  },
116
  {
117
- "epoch": 0.37,
118
- "grad_norm": 35.03072007251475,
119
- "learning_rate": 3.9694631307311825e-07,
120
- "logits/chosen": -6.090306758880615,
121
- "logits/rejected": -6.541258335113525,
122
- "logps/chosen": -430.2369689941406,
123
- "logps/rejected": -496.2119140625,
124
- "loss": 0.424,
125
- "rewards/accuracies": 0.8187500238418579,
126
- "rewards/chosen": -2.1488282680511475,
127
- "rewards/margins": 1.0692826509475708,
128
- "rewards/rejected": -3.218111038208008,
129
  "step": 70
130
  },
131
  {
132
- "epoch": 0.42,
133
- "grad_norm": 30.667469826354093,
134
- "learning_rate": 3.572801521931522e-07,
135
- "logits/chosen": -6.3887619972229,
136
- "logits/rejected": -6.877404689788818,
137
- "logps/chosen": -439.2911071777344,
138
- "logps/rejected": -526.5487060546875,
139
- "loss": 0.4001,
140
- "rewards/accuracies": 0.815625011920929,
141
- "rewards/chosen": -2.3115358352661133,
142
- "rewards/margins": 1.2141239643096924,
143
- "rewards/rejected": -3.5256600379943848,
144
  "step": 80
145
  },
146
  {
147
- "epoch": 0.47,
148
- "grad_norm": 37.36819911889553,
149
- "learning_rate": 3.139606943986089e-07,
150
- "logits/chosen": -6.5696258544921875,
151
- "logits/rejected": -7.1035637855529785,
152
- "logps/chosen": -458.3387756347656,
153
- "logps/rejected": -556.1650390625,
154
- "loss": 0.3875,
155
- "rewards/accuracies": 0.7718750238418579,
156
- "rewards/chosen": -2.5067451000213623,
157
- "rewards/margins": 1.3494058847427368,
158
- "rewards/rejected": -3.8561508655548096,
159
  "step": 90
160
  },
161
  {
162
- "epoch": 0.53,
163
- "grad_norm": 33.15053822353323,
164
- "learning_rate": 2.684631318687185e-07,
165
- "logits/chosen": -6.621747016906738,
166
- "logits/rejected": -7.236710548400879,
167
- "logps/chosen": -467.0467834472656,
168
- "logps/rejected": -582.046142578125,
169
- "loss": 0.3867,
170
- "rewards/accuracies": 0.796875,
171
- "rewards/chosen": -2.4837827682495117,
172
- "rewards/margins": 1.5418504476547241,
173
- "rewards/rejected": -4.025633811950684,
174
  "step": 100
175
  },
176
  {
177
- "epoch": 0.58,
178
- "grad_norm": 37.45830028947681,
179
- "learning_rate": 2.2233682952712483e-07,
180
- "logits/chosen": -6.568659782409668,
181
- "logits/rejected": -7.284300327301025,
182
- "logps/chosen": -460.4766540527344,
183
- "logps/rejected": -578.6600341796875,
184
- "loss": 0.3771,
185
- "rewards/accuracies": 0.846875011920929,
186
- "rewards/chosen": -2.3609726428985596,
187
- "rewards/margins": 1.647943139076233,
188
- "rewards/rejected": -4.008915901184082,
189
  "step": 110
190
  },
191
  {
192
- "epoch": 0.63,
193
- "grad_norm": 33.8427535333109,
194
- "learning_rate": 1.7715256327766884e-07,
195
- "logits/chosen": -6.796021461486816,
196
- "logits/rejected": -7.497170925140381,
197
- "logps/chosen": -504.50543212890625,
198
- "logps/rejected": -621.22314453125,
199
- "loss": 0.3508,
200
- "rewards/accuracies": 0.815625011920929,
201
- "rewards/chosen": -2.883434295654297,
202
- "rewards/margins": 1.6248239278793335,
203
- "rewards/rejected": -4.50825834274292,
204
  "step": 120
205
  },
206
  {
207
- "epoch": 0.69,
208
- "grad_norm": 35.353347844932394,
209
- "learning_rate": 1.3444902911492174e-07,
210
- "logits/chosen": -6.833544731140137,
211
- "logits/rejected": -7.472651481628418,
212
- "logps/chosen": -521.9656372070312,
213
- "logps/rejected": -659.3110961914062,
214
- "loss": 0.3705,
215
- "rewards/accuracies": 0.840624988079071,
216
- "rewards/chosen": -2.989759922027588,
217
- "rewards/margins": 1.8119176626205444,
218
- "rewards/rejected": -4.801677227020264,
219
  "step": 130
220
  },
221
  {
222
- "epoch": 0.74,
223
- "grad_norm": 32.6045025544378,
224
- "learning_rate": 9.56804446775518e-08,
225
- "logits/chosen": -6.738868713378906,
226
- "logits/rejected": -7.498864650726318,
227
- "logps/chosen": -470.77337646484375,
228
- "logps/rejected": -584.4710083007812,
229
- "loss": 0.3591,
230
- "rewards/accuracies": 0.831250011920929,
231
- "rewards/chosen": -2.558176040649414,
232
- "rewards/margins": 1.6057535409927368,
233
- "rewards/rejected": -4.163929462432861,
234
  "step": 140
235
  },
236
  {
237
- "epoch": 0.79,
238
- "grad_norm": 32.49183208247093,
239
- "learning_rate": 6.216702761078166e-08,
240
- "logits/chosen": -7.049106597900391,
241
- "logits/rejected": -7.772597312927246,
242
- "logps/chosen": -487.25726318359375,
243
- "logps/rejected": -619.6534423828125,
244
- "loss": 0.3576,
245
- "rewards/accuracies": 0.8343750238418579,
246
- "rewards/chosen": -2.82848858833313,
247
- "rewards/margins": 1.7726719379425049,
248
- "rewards/rejected": -4.601161003112793,
249
  "step": 150
250
  },
251
  {
252
- "epoch": 0.84,
253
- "grad_norm": 37.26747220029015,
254
- "learning_rate": 3.5050037137906885e-08,
255
- "logits/chosen": -6.9701337814331055,
256
- "logits/rejected": -7.731366157531738,
257
- "logps/chosen": -494.5716247558594,
258
- "logps/rejected": -623.4630737304688,
259
- "loss": 0.3502,
260
- "rewards/accuracies": 0.84375,
261
- "rewards/chosen": -2.750206470489502,
262
- "rewards/margins": 1.7980148792266846,
263
- "rewards/rejected": -4.548220634460449,
264
  "step": 160
265
  },
266
  {
267
- "epoch": 0.9,
268
- "grad_norm": 31.918546112926368,
269
- "learning_rate": 1.5252909846235894e-08,
270
- "logits/chosen": -7.007571220397949,
271
- "logits/rejected": -7.6982011795043945,
272
- "logps/chosen": -509.54388427734375,
273
- "logps/rejected": -666.7489624023438,
274
- "loss": 0.3631,
275
- "rewards/accuracies": 0.890625,
276
- "rewards/chosen": -2.9002063274383545,
277
- "rewards/margins": 2.0059866905212402,
278
- "rewards/rejected": -4.906193733215332,
279
  "step": 170
280
  },
281
- {
282
- "epoch": 0.95,
283
- "grad_norm": 29.32551345390984,
284
- "learning_rate": 3.4498131616493565e-09,
285
- "logits/chosen": -6.939836025238037,
286
- "logits/rejected": -7.576680660247803,
287
- "logps/chosen": -514.7128295898438,
288
- "logps/rejected": -656.9924926757812,
289
- "loss": 0.3518,
290
- "rewards/accuracies": 0.8031250238418579,
291
- "rewards/chosen": -2.932391881942749,
292
- "rewards/margins": 1.7769733667373657,
293
- "rewards/rejected": -4.709364891052246,
294
- "step": 180
295
- },
296
  {
297
  "epoch": 1.0,
298
- "step": 189,
299
  "total_flos": 0.0,
300
- "train_loss": 0.42979541909757746,
301
- "train_runtime": 5368.3646,
302
- "train_samples_per_second": 9.04,
303
- "train_steps_per_second": 0.035
304
  }
305
  ],
306
  "logging_steps": 10,
307
- "max_steps": 189,
308
  "num_input_tokens_seen": 0,
309
  "num_train_epochs": 1,
310
  "save_steps": 100,
 
 
 
 
 
 
 
 
 
 
 
 
311
  "total_flos": 0.0,
312
  "train_batch_size": 8,
313
  "trial_name": null,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 175,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.005714285714285714,
13
+ "grad_norm": 1251.908438964567,
14
+ "learning_rate": 2.7777777777777774e-08,
15
+ "logits/chosen": -4.099947929382324,
16
+ "logits/rejected": -4.528928756713867,
17
+ "logps/chosen": -297.4884033203125,
18
+ "logps/rejected": -227.07449340820312,
19
+ "loss": 0.6931,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
22
  "rewards/margins": 0.0,
 
24
  "step": 1
25
  },
26
  {
27
+ "epoch": 0.05714285714285714,
28
+ "grad_norm": 1007.5595895273253,
29
+ "learning_rate": 2.7777777777777776e-07,
30
+ "logits/chosen": -4.256350040435791,
31
+ "logits/rejected": -4.503963947296143,
32
+ "logps/chosen": -316.07769775390625,
33
+ "logps/rejected": -254.57467651367188,
34
+ "loss": 0.6613,
35
  "rewards/accuracies": 0.5034722089767456,
36
+ "rewards/chosen": 0.2096220850944519,
37
+ "rewards/margins": 0.15642070770263672,
38
+ "rewards/rejected": 0.0532013401389122,
39
  "step": 10
40
  },
41
  {
42
+ "epoch": 0.11428571428571428,
43
+ "grad_norm": 675.3841086149566,
44
+ "learning_rate": 4.997998237821233e-07,
45
+ "logits/chosen": -4.360010623931885,
46
+ "logits/rejected": -4.628513813018799,
47
+ "logps/chosen": -298.9122009277344,
48
+ "logps/rejected": -249.00918579101562,
49
+ "loss": 0.4212,
50
+ "rewards/accuracies": 0.800000011920929,
51
+ "rewards/chosen": 3.4100475311279297,
52
+ "rewards/margins": 2.592763900756836,
53
+ "rewards/rejected": 0.8172838091850281,
54
  "step": 20
55
  },
56
  {
57
+ "epoch": 0.17142857142857143,
58
+ "grad_norm": 675.1555265980924,
59
+ "learning_rate": 4.928272579403969e-07,
60
+ "logits/chosen": -4.373316287994385,
61
+ "logits/rejected": -4.6160383224487305,
62
+ "logps/chosen": -303.8053894042969,
63
+ "logps/rejected": -266.44818115234375,
64
+ "loss": 0.4762,
65
+ "rewards/accuracies": 0.824999988079071,
66
+ "rewards/chosen": 4.110724449157715,
67
+ "rewards/margins": 6.506677150726318,
68
+ "rewards/rejected": -2.3959527015686035,
69
  "step": 30
70
  },
71
  {
72
+ "epoch": 0.22857142857142856,
73
+ "grad_norm": 600.7834437052495,
74
+ "learning_rate": 4.7616414547743854e-07,
75
+ "logits/chosen": -4.35813045501709,
76
+ "logits/rejected": -4.55276346206665,
77
+ "logps/chosen": -289.32257080078125,
78
+ "logps/rejected": -250.25341796875,
79
+ "loss": 0.5303,
80
+ "rewards/accuracies": 0.8343750238418579,
81
+ "rewards/chosen": 3.9142494201660156,
82
+ "rewards/margins": 7.811418056488037,
83
+ "rewards/rejected": -3.897169589996338,
84
  "step": 40
85
  },
86
  {
87
+ "epoch": 0.2857142857142857,
88
+ "grad_norm": 788.2365830395779,
89
+ "learning_rate": 4.5047546391491e-07,
90
+ "logits/chosen": -4.276906967163086,
91
+ "logits/rejected": -4.5039567947387695,
92
+ "logps/chosen": -297.9548034667969,
93
+ "logps/rejected": -260.8029479980469,
94
+ "loss": 0.4673,
95
+ "rewards/accuracies": 0.8531249761581421,
96
+ "rewards/chosen": 4.0600905418396,
97
+ "rewards/margins": 7.4909186363220215,
98
+ "rewards/rejected": -3.430828094482422,
99
  "step": 50
100
  },
101
  {
102
+ "epoch": 0.34285714285714286,
103
+ "grad_norm": 715.3302112367288,
104
+ "learning_rate": 4.167863756189767e-07,
105
+ "logits/chosen": -4.322784900665283,
106
+ "logits/rejected": -4.564073085784912,
107
+ "logps/chosen": -293.1005554199219,
108
+ "logps/rejected": -254.21835327148438,
109
+ "loss": 0.4621,
110
+ "rewards/accuracies": 0.893750011920929,
111
+ "rewards/chosen": 5.134177207946777,
112
+ "rewards/margins": 7.875572204589844,
113
+ "rewards/rejected": -2.741394519805908,
114
  "step": 60
115
  },
116
  {
117
+ "epoch": 0.4,
118
+ "grad_norm": 633.4373267135044,
119
+ "learning_rate": 3.764413164801049e-07,
120
+ "logits/chosen": -4.282719612121582,
121
+ "logits/rejected": -4.559357643127441,
122
+ "logps/chosen": -287.670166015625,
123
+ "logps/rejected": -240.59359741210938,
124
+ "loss": 0.3978,
125
+ "rewards/accuracies": 0.8812500238418579,
126
+ "rewards/chosen": 4.107884407043457,
127
+ "rewards/margins": 7.409787654876709,
128
+ "rewards/rejected": -3.301903247833252,
129
  "step": 70
130
  },
131
  {
132
+ "epoch": 0.45714285714285713,
133
+ "grad_norm": 585.2050073161457,
134
+ "learning_rate": 3.3105034329273217e-07,
135
+ "logits/chosen": -4.1827239990234375,
136
+ "logits/rejected": -4.454409599304199,
137
+ "logps/chosen": -294.9931640625,
138
+ "logps/rejected": -253.04092407226562,
139
+ "loss": 0.4435,
140
+ "rewards/accuracies": 0.828125,
141
+ "rewards/chosen": 4.6706342697143555,
142
+ "rewards/margins": 7.3439764976501465,
143
+ "rewards/rejected": -2.67334246635437,
144
  "step": 80
145
  },
146
  {
147
+ "epoch": 0.5142857142857142,
148
+ "grad_norm": 744.3685065355755,
149
+ "learning_rate": 2.8242488095860204e-07,
150
+ "logits/chosen": -4.200292110443115,
151
+ "logits/rejected": -4.432915210723877,
152
+ "logps/chosen": -289.46466064453125,
153
+ "logps/rejected": -249.84048461914062,
154
+ "loss": 0.4059,
155
+ "rewards/accuracies": 0.8656250238418579,
156
+ "rewards/chosen": 5.276065349578857,
157
+ "rewards/margins": 7.3948163986206055,
158
+ "rewards/rejected": -2.118751287460327,
159
  "step": 90
160
  },
161
  {
162
+ "epoch": 0.5714285714285714,
163
+ "grad_norm": 671.6213883992457,
164
+ "learning_rate": 2.3250543366050071e-07,
165
+ "logits/chosen": -4.27265739440918,
166
+ "logits/rejected": -4.471877098083496,
167
+ "logps/chosen": -299.2139892578125,
168
+ "logps/rejected": -262.4172668457031,
169
+ "loss": 0.3587,
170
+ "rewards/accuracies": 0.8812500238418579,
171
+ "rewards/chosen": 5.20701265335083,
172
+ "rewards/margins": 7.200909614562988,
173
+ "rewards/rejected": -1.9938958883285522,
174
  "step": 100
175
  },
176
  {
177
+ "epoch": 0.6285714285714286,
178
+ "grad_norm": 731.7704645558294,
179
+ "learning_rate": 1.8328414484826743e-07,
180
+ "logits/chosen": -4.198658466339111,
181
+ "logits/rejected": -4.5151848793029785,
182
+ "logps/chosen": -299.356689453125,
183
+ "logps/rejected": -248.8483428955078,
184
+ "loss": 0.3809,
185
+ "rewards/accuracies": 0.893750011920929,
186
+ "rewards/chosen": 5.6889519691467285,
187
+ "rewards/margins": 7.738437652587891,
188
+ "rewards/rejected": -2.049485683441162,
189
  "step": 110
190
  },
191
  {
192
+ "epoch": 0.6857142857142857,
193
+ "grad_norm": 763.5659705732334,
194
+ "learning_rate": 1.3672529644823003e-07,
195
+ "logits/chosen": -4.348945140838623,
196
+ "logits/rejected": -4.604073524475098,
197
+ "logps/chosen": -269.67547607421875,
198
+ "logps/rejected": -229.8912811279297,
199
+ "loss": 0.3889,
200
+ "rewards/accuracies": 0.8656250238418579,
201
+ "rewards/chosen": 4.305537700653076,
202
+ "rewards/margins": 6.874751091003418,
203
+ "rewards/rejected": -2.569213390350342,
204
  "step": 120
205
  },
206
  {
207
+ "epoch": 0.7428571428571429,
208
+ "grad_norm": 609.1110882142142,
209
+ "learning_rate": 9.468691994696146e-08,
210
+ "logits/chosen": -4.341274261474609,
211
+ "logits/rejected": -4.5989298820495605,
212
+ "logps/chosen": -275.35833740234375,
213
+ "logps/rejected": -245.8115692138672,
214
+ "loss": 0.3699,
215
+ "rewards/accuracies": 0.871874988079071,
216
+ "rewards/chosen": 4.084762096405029,
217
+ "rewards/margins": 6.567566871643066,
218
+ "rewards/rejected": -2.4828040599823,
219
  "step": 130
220
  },
221
  {
222
+ "epoch": 0.8,
223
+ "grad_norm": 833.6810153426302,
224
+ "learning_rate": 5.884664762850466e-08,
225
+ "logits/chosen": -4.367494106292725,
226
+ "logits/rejected": -4.598031044006348,
227
+ "logps/chosen": -273.83099365234375,
228
+ "logps/rejected": -237.6991729736328,
229
+ "loss": 0.381,
230
+ "rewards/accuracies": 0.8656250238418579,
231
+ "rewards/chosen": 4.479451656341553,
232
+ "rewards/margins": 6.597804069519043,
233
+ "rewards/rejected": -2.118351697921753,
234
  "step": 140
235
  },
236
  {
237
+ "epoch": 0.8571428571428571,
238
+ "grad_norm": 677.0337306190108,
239
+ "learning_rate": 3.063476303172388e-08,
240
+ "logits/chosen": -4.267864227294922,
241
+ "logits/rejected": -4.488691806793213,
242
+ "logps/chosen": -288.9342041015625,
243
+ "logps/rejected": -247.3463592529297,
244
+ "loss": 0.392,
245
+ "rewards/accuracies": 0.8843749761581421,
246
+ "rewards/chosen": 5.401379585266113,
247
+ "rewards/margins": 7.644896030426025,
248
+ "rewards/rejected": -2.2435173988342285,
249
  "step": 150
250
  },
251
  {
252
+ "epoch": 0.9142857142857143,
253
+ "grad_norm": 601.5321276048043,
254
+ "learning_rate": 1.1177122393998372e-08,
255
+ "logits/chosen": -4.331192970275879,
256
+ "logits/rejected": -4.530573844909668,
257
+ "logps/chosen": -277.52020263671875,
258
+ "logps/rejected": -243.84323120117188,
259
+ "loss": 0.363,
260
+ "rewards/accuracies": 0.8687499761581421,
261
+ "rewards/chosen": 4.819538593292236,
262
+ "rewards/margins": 6.703360080718994,
263
+ "rewards/rejected": -1.8838220834732056,
264
  "step": 160
265
  },
266
  {
267
+ "epoch": 0.9714285714285714,
268
+ "grad_norm": 771.9761619985129,
269
+ "learning_rate": 1.2502249244298879e-09,
270
+ "logits/chosen": -4.293517112731934,
271
+ "logits/rejected": -4.548079013824463,
272
+ "logps/chosen": -295.1907653808594,
273
+ "logps/rejected": -248.191650390625,
274
+ "loss": 0.3693,
275
+ "rewards/accuracies": 0.8843749761581421,
276
+ "rewards/chosen": 4.456015586853027,
277
+ "rewards/margins": 6.605706214904785,
278
+ "rewards/rejected": -2.1496901512145996,
279
  "step": 170
280
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  {
282
  "epoch": 1.0,
283
+ "step": 175,
284
  "total_flos": 0.0,
285
+ "train_loss": 0.4268451908656529,
286
+ "train_runtime": 5571.7779,
287
+ "train_samples_per_second": 8.032,
288
+ "train_steps_per_second": 0.031
289
  }
290
  ],
291
  "logging_steps": 10,
292
+ "max_steps": 175,
293
  "num_input_tokens_seen": 0,
294
  "num_train_epochs": 1,
295
  "save_steps": 100,
296
+ "stateful_callbacks": {
297
+ "TrainerControl": {
298
+ "args": {
299
+ "should_epoch_stop": false,
300
+ "should_evaluate": false,
301
+ "should_log": false,
302
+ "should_save": true,
303
+ "should_training_stop": false
304
+ },
305
+ "attributes": {}
306
+ }
307
+ },
308
  "total_flos": 0.0,
309
  "train_batch_size": 8,
310
  "trial_name": null,