cappuch commited on
Commit
0ade8fd
1 Parent(s): 053edbf

Upload 9 files

Browse files
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92c36a898cd8e2dd7bbc58deb64c721813aba6d5fc055ec40dfabed08d43244f
3
+ size 36072070
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:527e76589de1ce09084875a615d5046549d45eccd68a6a1373f7a1358349cc7c
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbbfa411208c0097956115f6349b400eeedf28dc4897f54f081a71275edc25b8
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "bos_token": "<s>",
31
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "legacy": false,
35
+ "model_max_length": 2048,
36
+ "pad_token": "</s>",
37
+ "padding_side": "right",
38
+ "sp_model_kwargs": {},
39
+ "tokenizer_class": "LlamaTokenizer",
40
+ "unk_token": "<unk>",
41
+ "use_default_system_prompt": false
42
+ }
trainer_state.json ADDED
@@ -0,0 +1,751 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "learning_rate": 6.6666666666666675e-06,
14
+ "logits/chosen": -3.1526219844818115,
15
+ "logits/rejected": -3.3119924068450928,
16
+ "logps/chosen": -18.28135108947754,
17
+ "logps/rejected": -33.52398681640625,
18
+ "loss": 0.6997,
19
+ "rewards/accuracies": 0.0,
20
+ "rewards/chosen": -0.006796836853027344,
21
+ "rewards/margins": -0.012901116162538528,
22
+ "rewards/rejected": 0.006104278843849897,
23
+ "step": 1
24
+ },
25
+ {
26
+ "epoch": 0.04,
27
+ "learning_rate": 6.666666666666667e-05,
28
+ "logits/chosen": -3.1073851585388184,
29
+ "logits/rejected": -3.090308666229248,
30
+ "logps/chosen": -20.141502380371094,
31
+ "logps/rejected": -18.037580490112305,
32
+ "loss": 0.694,
33
+ "rewards/accuracies": 0.0833333358168602,
34
+ "rewards/chosen": 0.001263597165234387,
35
+ "rewards/margins": -0.001611270010471344,
36
+ "rewards/rejected": 0.002874867059290409,
37
+ "step": 10
38
+ },
39
+ {
40
+ "epoch": 0.08,
41
+ "learning_rate": 0.00013333333333333334,
42
+ "logits/chosen": -3.0630269050598145,
43
+ "logits/rejected": -3.1416983604431152,
44
+ "logps/chosen": -31.93638038635254,
45
+ "logps/rejected": -42.507789611816406,
46
+ "loss": 0.6916,
47
+ "rewards/accuracies": 0.2750000059604645,
48
+ "rewards/chosen": 0.010843334719538689,
49
+ "rewards/margins": 0.003288193140178919,
50
+ "rewards/rejected": 0.0075551411136984825,
51
+ "step": 20
52
+ },
53
+ {
54
+ "epoch": 0.12,
55
+ "learning_rate": 0.0002,
56
+ "logits/chosen": -3.148637294769287,
57
+ "logits/rejected": -3.150296211242676,
58
+ "logps/chosen": -22.95195770263672,
59
+ "logps/rejected": -23.612133026123047,
60
+ "loss": 0.6974,
61
+ "rewards/accuracies": 0.15000000596046448,
62
+ "rewards/chosen": -0.002520971465855837,
63
+ "rewards/margins": -0.0078018950298428535,
64
+ "rewards/rejected": 0.005280924029648304,
65
+ "step": 30
66
+ },
67
+ {
68
+ "epoch": 0.16,
69
+ "learning_rate": 0.0002666666666666667,
70
+ "logits/chosen": -3.1271812915802,
71
+ "logits/rejected": -3.0903429985046387,
72
+ "logps/chosen": -34.57979965209961,
73
+ "logps/rejected": -27.37040138244629,
74
+ "loss": 0.6925,
75
+ "rewards/accuracies": 0.125,
76
+ "rewards/chosen": 0.03781440109014511,
77
+ "rewards/margins": 0.006826506461948156,
78
+ "rewards/rejected": 0.030987894162535667,
79
+ "step": 40
80
+ },
81
+ {
82
+ "epoch": 0.2,
83
+ "learning_rate": 0.0003333333333333333,
84
+ "logits/chosen": -3.0824079513549805,
85
+ "logits/rejected": -3.0999526977539062,
86
+ "logps/chosen": -29.16314697265625,
87
+ "logps/rejected": -30.843231201171875,
88
+ "loss": 0.7057,
89
+ "rewards/accuracies": 0.25,
90
+ "rewards/chosen": 0.08534860610961914,
91
+ "rewards/margins": -0.013902002945542336,
92
+ "rewards/rejected": 0.09925060719251633,
93
+ "step": 50
94
+ },
95
+ {
96
+ "epoch": 0.24,
97
+ "learning_rate": 0.0004,
98
+ "logits/chosen": -3.1495282649993896,
99
+ "logits/rejected": -3.184638500213623,
100
+ "logps/chosen": -19.72355079650879,
101
+ "logps/rejected": -28.213886260986328,
102
+ "loss": 0.7213,
103
+ "rewards/accuracies": 0.07500000298023224,
104
+ "rewards/chosen": -0.05634387582540512,
105
+ "rewards/margins": -0.043291497975587845,
106
+ "rewards/rejected": -0.01305237878113985,
107
+ "step": 60
108
+ },
109
+ {
110
+ "epoch": 0.28,
111
+ "learning_rate": 0.00046666666666666666,
112
+ "logits/chosen": -3.04826021194458,
113
+ "logits/rejected": -3.005667209625244,
114
+ "logps/chosen": -25.625635147094727,
115
+ "logps/rejected": -24.768199920654297,
116
+ "loss": 0.7002,
117
+ "rewards/accuracies": 0.20000000298023224,
118
+ "rewards/chosen": -0.06301303952932358,
119
+ "rewards/margins": -0.00712633365765214,
120
+ "rewards/rejected": -0.055886708199977875,
121
+ "step": 70
122
+ },
123
+ {
124
+ "epoch": 0.32,
125
+ "learning_rate": 0.0005333333333333334,
126
+ "logits/chosen": -3.048518419265747,
127
+ "logits/rejected": -3.061340808868408,
128
+ "logps/chosen": -32.414894104003906,
129
+ "logps/rejected": -34.773399353027344,
130
+ "loss": 0.7835,
131
+ "rewards/accuracies": 0.32499998807907104,
132
+ "rewards/chosen": -0.23218408226966858,
133
+ "rewards/margins": -0.03052915260195732,
134
+ "rewards/rejected": -0.20165491104125977,
135
+ "step": 80
136
+ },
137
+ {
138
+ "epoch": 0.36,
139
+ "learning_rate": 0.0006,
140
+ "logits/chosen": -3.035808563232422,
141
+ "logits/rejected": -3.1053929328918457,
142
+ "logps/chosen": -31.509021759033203,
143
+ "logps/rejected": -45.353553771972656,
144
+ "loss": 0.7051,
145
+ "rewards/accuracies": 0.30000001192092896,
146
+ "rewards/chosen": -0.3204951286315918,
147
+ "rewards/margins": 0.283873975276947,
148
+ "rewards/rejected": -0.604369044303894,
149
+ "step": 90
150
+ },
151
+ {
152
+ "epoch": 0.4,
153
+ "learning_rate": 0.0006666666666666666,
154
+ "logits/chosen": -2.959228992462158,
155
+ "logits/rejected": -2.98266339302063,
156
+ "logps/chosen": -51.024803161621094,
157
+ "logps/rejected": -54.91625213623047,
158
+ "loss": 1.0868,
159
+ "rewards/accuracies": 0.20000000298023224,
160
+ "rewards/chosen": -1.835883378982544,
161
+ "rewards/margins": 0.3176426589488983,
162
+ "rewards/rejected": -2.1535260677337646,
163
+ "step": 100
164
+ },
165
+ {
166
+ "epoch": 0.44,
167
+ "learning_rate": 0.0007333333333333333,
168
+ "logits/chosen": -3.0634117126464844,
169
+ "logits/rejected": -3.0850846767425537,
170
+ "logps/chosen": -23.597423553466797,
171
+ "logps/rejected": -26.58676528930664,
172
+ "loss": 0.7629,
173
+ "rewards/accuracies": 0.20000000298023224,
174
+ "rewards/chosen": -0.2058214694261551,
175
+ "rewards/margins": -0.05166854336857796,
176
+ "rewards/rejected": -0.15415294468402863,
177
+ "step": 110
178
+ },
179
+ {
180
+ "epoch": 0.48,
181
+ "learning_rate": 0.0008,
182
+ "logits/chosen": -3.1232190132141113,
183
+ "logits/rejected": -3.1285691261291504,
184
+ "logps/chosen": -20.47592544555664,
185
+ "logps/rejected": -22.593311309814453,
186
+ "loss": 0.7341,
187
+ "rewards/accuracies": 0.15000000596046448,
188
+ "rewards/chosen": -0.46454209089279175,
189
+ "rewards/margins": -0.023245975375175476,
190
+ "rewards/rejected": -0.44129619002342224,
191
+ "step": 120
192
+ },
193
+ {
194
+ "epoch": 0.52,
195
+ "learning_rate": 0.0008666666666666667,
196
+ "logits/chosen": -2.975956678390503,
197
+ "logits/rejected": -3.027247190475464,
198
+ "logps/chosen": -34.07396697998047,
199
+ "logps/rejected": -42.46125793457031,
200
+ "loss": 0.723,
201
+ "rewards/accuracies": 0.2750000059604645,
202
+ "rewards/chosen": -0.4881093502044678,
203
+ "rewards/margins": 0.3038038909435272,
204
+ "rewards/rejected": -0.7919132113456726,
205
+ "step": 130
206
+ },
207
+ {
208
+ "epoch": 0.56,
209
+ "learning_rate": 0.0009333333333333333,
210
+ "logits/chosen": -2.981985569000244,
211
+ "logits/rejected": -2.9559457302093506,
212
+ "logps/chosen": -33.598899841308594,
213
+ "logps/rejected": -40.866451263427734,
214
+ "loss": 0.7877,
215
+ "rewards/accuracies": 0.2750000059604645,
216
+ "rewards/chosen": -0.35516494512557983,
217
+ "rewards/margins": 0.4116950035095215,
218
+ "rewards/rejected": -0.7668598890304565,
219
+ "step": 140
220
+ },
221
+ {
222
+ "epoch": 0.6,
223
+ "learning_rate": 0.001,
224
+ "logits/chosen": -3.05903959274292,
225
+ "logits/rejected": -3.0611279010772705,
226
+ "logps/chosen": -20.234691619873047,
227
+ "logps/rejected": -19.169904708862305,
228
+ "loss": 0.9163,
229
+ "rewards/accuracies": 0.10000000149011612,
230
+ "rewards/chosen": -0.36899399757385254,
231
+ "rewards/margins": -0.08086968958377838,
232
+ "rewards/rejected": -0.28812432289123535,
233
+ "step": 150
234
+ },
235
+ {
236
+ "epoch": 0.64,
237
+ "learning_rate": 0.0009723756906077348,
238
+ "logits/chosen": -2.9612436294555664,
239
+ "logits/rejected": -2.9223554134368896,
240
+ "logps/chosen": -32.5883674621582,
241
+ "logps/rejected": -25.370834350585938,
242
+ "loss": 0.7856,
243
+ "rewards/accuracies": 0.17499999701976776,
244
+ "rewards/chosen": -0.18723489344120026,
245
+ "rewards/margins": -0.10649768263101578,
246
+ "rewards/rejected": -0.08073721826076508,
247
+ "step": 160
248
+ },
249
+ {
250
+ "epoch": 0.68,
251
+ "learning_rate": 0.0009447513812154696,
252
+ "logits/chosen": -2.9623398780822754,
253
+ "logits/rejected": -2.914522171020508,
254
+ "logps/chosen": -39.70682907104492,
255
+ "logps/rejected": -33.20659637451172,
256
+ "loss": 0.898,
257
+ "rewards/accuracies": 0.20000000298023224,
258
+ "rewards/chosen": -0.4028944969177246,
259
+ "rewards/margins": -0.04350559413433075,
260
+ "rewards/rejected": -0.35938888788223267,
261
+ "step": 170
262
+ },
263
+ {
264
+ "epoch": 0.72,
265
+ "learning_rate": 0.0009171270718232044,
266
+ "logits/chosen": -3.0877394676208496,
267
+ "logits/rejected": -3.092603921890259,
268
+ "logps/chosen": -43.16739273071289,
269
+ "logps/rejected": -47.593746185302734,
270
+ "loss": 1.2201,
271
+ "rewards/accuracies": 0.17499999701976776,
272
+ "rewards/chosen": -2.5319814682006836,
273
+ "rewards/margins": -0.10082467645406723,
274
+ "rewards/rejected": -2.431157112121582,
275
+ "step": 180
276
+ },
277
+ {
278
+ "epoch": 0.76,
279
+ "learning_rate": 0.0008895027624309392,
280
+ "logits/chosen": -2.09025239944458,
281
+ "logits/rejected": -2.0903568267822266,
282
+ "logps/chosen": -112.7010269165039,
283
+ "logps/rejected": -105.31596374511719,
284
+ "loss": 2.9619,
285
+ "rewards/accuracies": 0.20000000298023224,
286
+ "rewards/chosen": -7.883659362792969,
287
+ "rewards/margins": -0.7001466751098633,
288
+ "rewards/rejected": -7.1835126876831055,
289
+ "step": 190
290
+ },
291
+ {
292
+ "epoch": 0.8,
293
+ "learning_rate": 0.0008618784530386741,
294
+ "logits/chosen": -1.8135408163070679,
295
+ "logits/rejected": -1.8160464763641357,
296
+ "logps/chosen": -75.82559967041016,
297
+ "logps/rejected": -65.90226745605469,
298
+ "loss": 2.555,
299
+ "rewards/accuracies": 0.17499999701976776,
300
+ "rewards/chosen": -5.0599846839904785,
301
+ "rewards/margins": -0.6209059953689575,
302
+ "rewards/rejected": -4.439078330993652,
303
+ "step": 200
304
+ },
305
+ {
306
+ "epoch": 0.84,
307
+ "learning_rate": 0.0008342541436464089,
308
+ "logits/chosen": -2.6365649700164795,
309
+ "logits/rejected": -2.633017063140869,
310
+ "logps/chosen": -73.8572998046875,
311
+ "logps/rejected": -115.65068054199219,
312
+ "loss": 1.3204,
313
+ "rewards/accuracies": 0.3499999940395355,
314
+ "rewards/chosen": -4.614927291870117,
315
+ "rewards/margins": 3.1844677925109863,
316
+ "rewards/rejected": -7.799394130706787,
317
+ "step": 210
318
+ },
319
+ {
320
+ "epoch": 0.88,
321
+ "learning_rate": 0.0008066298342541437,
322
+ "logits/chosen": -2.338550329208374,
323
+ "logits/rejected": -2.337129831314087,
324
+ "logps/chosen": -75.24410247802734,
325
+ "logps/rejected": -89.40665435791016,
326
+ "loss": 1.6189,
327
+ "rewards/accuracies": 0.20000000298023224,
328
+ "rewards/chosen": -5.084308624267578,
329
+ "rewards/margins": 0.9201302528381348,
330
+ "rewards/rejected": -6.004438877105713,
331
+ "step": 220
332
+ },
333
+ {
334
+ "epoch": 0.92,
335
+ "learning_rate": 0.0007790055248618785,
336
+ "logits/chosen": -2.328997850418091,
337
+ "logits/rejected": -2.326946496963501,
338
+ "logps/chosen": -62.543540954589844,
339
+ "logps/rejected": -84.85903930664062,
340
+ "loss": 1.3971,
341
+ "rewards/accuracies": 0.22499999403953552,
342
+ "rewards/chosen": -3.8174233436584473,
343
+ "rewards/margins": 1.793341875076294,
344
+ "rewards/rejected": -5.610764503479004,
345
+ "step": 230
346
+ },
347
+ {
348
+ "epoch": 0.96,
349
+ "learning_rate": 0.0007513812154696133,
350
+ "logits/chosen": -2.794644832611084,
351
+ "logits/rejected": -2.7910995483398438,
352
+ "logps/chosen": -58.40827560424805,
353
+ "logps/rejected": -79.7098617553711,
354
+ "loss": 1.2398,
355
+ "rewards/accuracies": 0.25,
356
+ "rewards/chosen": -3.556912660598755,
357
+ "rewards/margins": 1.693927526473999,
358
+ "rewards/rejected": -5.250839710235596,
359
+ "step": 240
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "learning_rate": 0.0007237569060773481,
364
+ "logits/chosen": -2.703537702560425,
365
+ "logits/rejected": -2.7038962841033936,
366
+ "logps/chosen": -40.56044006347656,
367
+ "logps/rejected": -43.81157684326172,
368
+ "loss": 1.3514,
369
+ "rewards/accuracies": 0.10000000149011612,
370
+ "rewards/chosen": -2.4380924701690674,
371
+ "rewards/margins": 0.2092890739440918,
372
+ "rewards/rejected": -2.647381544113159,
373
+ "step": 250
374
+ },
375
+ {
376
+ "epoch": 1.04,
377
+ "learning_rate": 0.0006961325966850829,
378
+ "logits/chosen": -2.870227098464966,
379
+ "logits/rejected": -2.891268253326416,
380
+ "logps/chosen": -67.85215759277344,
381
+ "logps/rejected": -91.36506652832031,
382
+ "loss": 1.2686,
383
+ "rewards/accuracies": 0.2750000059604645,
384
+ "rewards/chosen": -3.6993489265441895,
385
+ "rewards/margins": 1.7562892436981201,
386
+ "rewards/rejected": -5.455638408660889,
387
+ "step": 260
388
+ },
389
+ {
390
+ "epoch": 1.08,
391
+ "learning_rate": 0.0006685082872928176,
392
+ "logits/chosen": -3.1669762134552,
393
+ "logits/rejected": -3.1949028968811035,
394
+ "logps/chosen": -30.239299774169922,
395
+ "logps/rejected": -42.69363021850586,
396
+ "loss": 0.7342,
397
+ "rewards/accuracies": 0.20000000298023224,
398
+ "rewards/chosen": -0.33911675214767456,
399
+ "rewards/margins": 0.5873344540596008,
400
+ "rewards/rejected": -0.9264512062072754,
401
+ "step": 270
402
+ },
403
+ {
404
+ "epoch": 1.12,
405
+ "learning_rate": 0.0006408839779005525,
406
+ "logits/chosen": -3.065441131591797,
407
+ "logits/rejected": -3.0655035972595215,
408
+ "logps/chosen": -39.723567962646484,
409
+ "logps/rejected": -53.760826110839844,
410
+ "loss": 1.3162,
411
+ "rewards/accuracies": 0.15000000596046448,
412
+ "rewards/chosen": -2.2118256092071533,
413
+ "rewards/margins": 0.6879772543907166,
414
+ "rewards/rejected": -2.8998026847839355,
415
+ "step": 280
416
+ },
417
+ {
418
+ "epoch": 1.16,
419
+ "learning_rate": 0.0006132596685082873,
420
+ "logits/chosen": -2.6939263343811035,
421
+ "logits/rejected": -2.6940252780914307,
422
+ "logps/chosen": -60.013755798339844,
423
+ "logps/rejected": -58.038536071777344,
424
+ "loss": 2.1225,
425
+ "rewards/accuracies": 0.20000000298023224,
426
+ "rewards/chosen": -3.602609634399414,
427
+ "rewards/margins": 0.10532107204198837,
428
+ "rewards/rejected": -3.70793080329895,
429
+ "step": 290
430
+ },
431
+ {
432
+ "epoch": 1.2,
433
+ "learning_rate": 0.000585635359116022,
434
+ "logits/chosen": -2.695570468902588,
435
+ "logits/rejected": -2.6920197010040283,
436
+ "logps/chosen": -46.42293930053711,
437
+ "logps/rejected": -44.63296127319336,
438
+ "loss": 0.9542,
439
+ "rewards/accuracies": 0.17499999701976776,
440
+ "rewards/chosen": -2.1268885135650635,
441
+ "rewards/margins": 0.09368989616632462,
442
+ "rewards/rejected": -2.22057843208313,
443
+ "step": 300
444
+ },
445
+ {
446
+ "epoch": 1.24,
447
+ "learning_rate": 0.000558011049723757,
448
+ "logits/chosen": -3.1045467853546143,
449
+ "logits/rejected": -3.1222219467163086,
450
+ "logps/chosen": -22.447757720947266,
451
+ "logps/rejected": -27.05214500427246,
452
+ "loss": 0.7383,
453
+ "rewards/accuracies": 0.15000000596046448,
454
+ "rewards/chosen": -0.1806814968585968,
455
+ "rewards/margins": 0.14409010112285614,
456
+ "rewards/rejected": -0.32477161288261414,
457
+ "step": 310
458
+ },
459
+ {
460
+ "epoch": 1.28,
461
+ "learning_rate": 0.0005303867403314917,
462
+ "logits/chosen": -3.1156842708587646,
463
+ "logits/rejected": -3.130932569503784,
464
+ "logps/chosen": -21.35702133178711,
465
+ "logps/rejected": -20.4589900970459,
466
+ "loss": 0.7337,
467
+ "rewards/accuracies": 0.20000000298023224,
468
+ "rewards/chosen": -0.29580003023147583,
469
+ "rewards/margins": 0.08602263033390045,
470
+ "rewards/rejected": -0.3818226456642151,
471
+ "step": 320
472
+ },
473
+ {
474
+ "epoch": 1.32,
475
+ "learning_rate": 0.0005027624309392266,
476
+ "logits/chosen": -3.050220012664795,
477
+ "logits/rejected": -3.046596050262451,
478
+ "logps/chosen": -24.79575538635254,
479
+ "logps/rejected": -33.02082443237305,
480
+ "loss": 0.6601,
481
+ "rewards/accuracies": 0.25,
482
+ "rewards/chosen": -0.31610769033432007,
483
+ "rewards/margins": 0.4491572976112366,
484
+ "rewards/rejected": -0.7652650475502014,
485
+ "step": 330
486
+ },
487
+ {
488
+ "epoch": 1.36,
489
+ "learning_rate": 0.00047513812154696136,
490
+ "logits/chosen": -3.1225409507751465,
491
+ "logits/rejected": -3.142671823501587,
492
+ "logps/chosen": -34.958404541015625,
493
+ "logps/rejected": -37.874855041503906,
494
+ "loss": 0.6053,
495
+ "rewards/accuracies": 0.30000001192092896,
496
+ "rewards/chosen": -0.23902547359466553,
497
+ "rewards/margins": 0.42017507553100586,
498
+ "rewards/rejected": -0.6592004895210266,
499
+ "step": 340
500
+ },
501
+ {
502
+ "epoch": 1.4,
503
+ "learning_rate": 0.00044751381215469617,
504
+ "logits/chosen": -3.3166356086730957,
505
+ "logits/rejected": -3.271238327026367,
506
+ "logps/chosen": -41.72985076904297,
507
+ "logps/rejected": -43.41400909423828,
508
+ "loss": 0.7479,
509
+ "rewards/accuracies": 0.30000001192092896,
510
+ "rewards/chosen": -0.3358752131462097,
511
+ "rewards/margins": 0.2891044020652771,
512
+ "rewards/rejected": -0.6249796152114868,
513
+ "step": 350
514
+ },
515
+ {
516
+ "epoch": 1.44,
517
+ "learning_rate": 0.0004198895027624309,
518
+ "logits/chosen": -3.2012417316436768,
519
+ "logits/rejected": -3.2214763164520264,
520
+ "logps/chosen": -29.948162078857422,
521
+ "logps/rejected": -34.02383041381836,
522
+ "loss": 0.7083,
523
+ "rewards/accuracies": 0.15000000596046448,
524
+ "rewards/chosen": -0.20552043616771698,
525
+ "rewards/margins": -0.0057243406772613525,
526
+ "rewards/rejected": -0.19979611039161682,
527
+ "step": 360
528
+ },
529
+ {
530
+ "epoch": 1.48,
531
+ "learning_rate": 0.00039226519337016573,
532
+ "logits/chosen": -3.217780590057373,
533
+ "logits/rejected": -3.2134361267089844,
534
+ "logps/chosen": -16.843505859375,
535
+ "logps/rejected": -17.496295928955078,
536
+ "loss": 0.763,
537
+ "rewards/accuracies": 0.10000000149011612,
538
+ "rewards/chosen": -0.05251544713973999,
539
+ "rewards/margins": -0.043300654739141464,
540
+ "rewards/rejected": -0.009214771911501884,
541
+ "step": 370
542
+ },
543
+ {
544
+ "epoch": 1.52,
545
+ "learning_rate": 0.0003646408839779006,
546
+ "logits/chosen": -3.168470859527588,
547
+ "logits/rejected": -3.2095096111297607,
548
+ "logps/chosen": -31.317157745361328,
549
+ "logps/rejected": -40.99824523925781,
550
+ "loss": 0.6099,
551
+ "rewards/accuracies": 0.3499999940395355,
552
+ "rewards/chosen": -0.06307787448167801,
553
+ "rewards/margins": 0.3778603971004486,
554
+ "rewards/rejected": -0.4409382939338684,
555
+ "step": 380
556
+ },
557
+ {
558
+ "epoch": 1.56,
559
+ "learning_rate": 0.0003370165745856354,
560
+ "logits/chosen": -3.1807992458343506,
561
+ "logits/rejected": -3.1729633808135986,
562
+ "logps/chosen": -25.59493064880371,
563
+ "logps/rejected": -24.889175415039062,
564
+ "loss": 0.6768,
565
+ "rewards/accuracies": 0.20000000298023224,
566
+ "rewards/chosen": -0.03841208666563034,
567
+ "rewards/margins": 0.2406325787305832,
568
+ "rewards/rejected": -0.2790446877479553,
569
+ "step": 390
570
+ },
571
+ {
572
+ "epoch": 1.6,
573
+ "learning_rate": 0.00030939226519337016,
574
+ "logits/chosen": -3.1939032077789307,
575
+ "logits/rejected": -3.1782355308532715,
576
+ "logps/chosen": -14.640347480773926,
577
+ "logps/rejected": -13.197868347167969,
578
+ "loss": 0.6587,
579
+ "rewards/accuracies": 0.15000000596046448,
580
+ "rewards/chosen": -0.013051311485469341,
581
+ "rewards/margins": 0.11761553585529327,
582
+ "rewards/rejected": -0.1306668370962143,
583
+ "step": 400
584
+ },
585
+ {
586
+ "epoch": 1.64,
587
+ "learning_rate": 0.00028176795580110497,
588
+ "logits/chosen": -3.2121384143829346,
589
+ "logits/rejected": -3.242738723754883,
590
+ "logps/chosen": -26.492828369140625,
591
+ "logps/rejected": -33.63092041015625,
592
+ "loss": 0.6547,
593
+ "rewards/accuracies": 0.2750000059604645,
594
+ "rewards/chosen": -0.15410800278186798,
595
+ "rewards/margins": 0.2064083367586136,
596
+ "rewards/rejected": -0.36051633954048157,
597
+ "step": 410
598
+ },
599
+ {
600
+ "epoch": 1.68,
601
+ "learning_rate": 0.0002541436464088398,
602
+ "logits/chosen": -3.217862367630005,
603
+ "logits/rejected": -3.2220897674560547,
604
+ "logps/chosen": -45.029319763183594,
605
+ "logps/rejected": -50.58649444580078,
606
+ "loss": 0.7074,
607
+ "rewards/accuracies": 0.30000001192092896,
608
+ "rewards/chosen": -0.21466748416423798,
609
+ "rewards/margins": 0.43003931641578674,
610
+ "rewards/rejected": -0.644706666469574,
611
+ "step": 420
612
+ },
613
+ {
614
+ "epoch": 1.72,
615
+ "learning_rate": 0.0002265193370165746,
616
+ "logits/chosen": -3.2857704162597656,
617
+ "logits/rejected": -3.285273313522339,
618
+ "logps/chosen": -24.04534912109375,
619
+ "logps/rejected": -25.24020767211914,
620
+ "loss": 0.7055,
621
+ "rewards/accuracies": 0.22499999403953552,
622
+ "rewards/chosen": -0.05270111560821533,
623
+ "rewards/margins": 0.19839158654212952,
624
+ "rewards/rejected": -0.25109270215034485,
625
+ "step": 430
626
+ },
627
+ {
628
+ "epoch": 1.76,
629
+ "learning_rate": 0.0001988950276243094,
630
+ "logits/chosen": -3.1307530403137207,
631
+ "logits/rejected": -3.097612142562866,
632
+ "logps/chosen": -25.230710983276367,
633
+ "logps/rejected": -27.142175674438477,
634
+ "loss": 0.6265,
635
+ "rewards/accuracies": 0.2750000059604645,
636
+ "rewards/chosen": 0.06528893858194351,
637
+ "rewards/margins": 0.3360690474510193,
638
+ "rewards/rejected": -0.27078011631965637,
639
+ "step": 440
640
+ },
641
+ {
642
+ "epoch": 1.8,
643
+ "learning_rate": 0.0001712707182320442,
644
+ "logits/chosen": -3.210901975631714,
645
+ "logits/rejected": -3.250978469848633,
646
+ "logps/chosen": -16.752582550048828,
647
+ "logps/rejected": -30.28323745727539,
648
+ "loss": 0.5541,
649
+ "rewards/accuracies": 0.30000001192092896,
650
+ "rewards/chosen": 0.05518122762441635,
651
+ "rewards/margins": 0.479708731174469,
652
+ "rewards/rejected": -0.42452749609947205,
653
+ "step": 450
654
+ },
655
+ {
656
+ "epoch": 1.84,
657
+ "learning_rate": 0.000143646408839779,
658
+ "logits/chosen": -3.048260450363159,
659
+ "logits/rejected": -3.1042888164520264,
660
+ "logps/chosen": -33.8494758605957,
661
+ "logps/rejected": -49.50830841064453,
662
+ "loss": 0.5998,
663
+ "rewards/accuracies": 0.3499999940395355,
664
+ "rewards/chosen": 0.0009274661424569786,
665
+ "rewards/margins": 0.6367529630661011,
666
+ "rewards/rejected": -0.635825514793396,
667
+ "step": 460
668
+ },
669
+ {
670
+ "epoch": 1.88,
671
+ "learning_rate": 0.0001160220994475138,
672
+ "logits/chosen": -3.272062301635742,
673
+ "logits/rejected": -3.2841033935546875,
674
+ "logps/chosen": -16.145034790039062,
675
+ "logps/rejected": -19.171789169311523,
676
+ "loss": 0.6134,
677
+ "rewards/accuracies": 0.22499999403953552,
678
+ "rewards/chosen": 0.10014114528894424,
679
+ "rewards/margins": 0.250404953956604,
680
+ "rewards/rejected": -0.15026383101940155,
681
+ "step": 470
682
+ },
683
+ {
684
+ "epoch": 1.92,
685
+ "learning_rate": 8.839779005524861e-05,
686
+ "logits/chosen": -3.1270499229431152,
687
+ "logits/rejected": -3.1859707832336426,
688
+ "logps/chosen": -28.680988311767578,
689
+ "logps/rejected": -39.859764099121094,
690
+ "loss": 0.6342,
691
+ "rewards/accuracies": 0.32499998807907104,
692
+ "rewards/chosen": -0.053285278379917145,
693
+ "rewards/margins": 0.39891186356544495,
694
+ "rewards/rejected": -0.4521971344947815,
695
+ "step": 480
696
+ },
697
+ {
698
+ "epoch": 1.96,
699
+ "learning_rate": 6.0773480662983424e-05,
700
+ "logits/chosen": -3.140641212463379,
701
+ "logits/rejected": -3.090985059738159,
702
+ "logps/chosen": -32.661651611328125,
703
+ "logps/rejected": -32.10502624511719,
704
+ "loss": 0.7772,
705
+ "rewards/accuracies": 0.20000000298023224,
706
+ "rewards/chosen": -0.3610732853412628,
707
+ "rewards/margins": 0.07261800020933151,
708
+ "rewards/rejected": -0.43369120359420776,
709
+ "step": 490
710
+ },
711
+ {
712
+ "epoch": 2.0,
713
+ "learning_rate": 3.3149171270718233e-05,
714
+ "logits/chosen": -3.1711134910583496,
715
+ "logits/rejected": -3.190059185028076,
716
+ "logps/chosen": -36.2880973815918,
717
+ "logps/rejected": -48.17897415161133,
718
+ "loss": 0.613,
719
+ "rewards/accuracies": 0.375,
720
+ "rewards/chosen": -0.25595012307167053,
721
+ "rewards/margins": 0.6426823139190674,
722
+ "rewards/rejected": -0.8986324071884155,
723
+ "step": 500
724
+ },
725
+ {
726
+ "epoch": 2.0,
727
+ "eval_logits/chosen": -3.225074052810669,
728
+ "eval_logits/rejected": -3.2351696491241455,
729
+ "eval_logps/chosen": -28.331165313720703,
730
+ "eval_logps/rejected": -31.33060073852539,
731
+ "eval_loss": 0.7142000794410706,
732
+ "eval_rewards/accuracies": 0.22200000286102295,
733
+ "eval_rewards/chosen": -0.15271247923374176,
734
+ "eval_rewards/margins": 0.16096290946006775,
735
+ "eval_rewards/rejected": -0.3136754035949707,
736
+ "eval_runtime": 411.5707,
737
+ "eval_samples_per_second": 2.43,
738
+ "eval_steps_per_second": 0.304,
739
+ "step": 500
740
+ }
741
+ ],
742
+ "logging_steps": 10,
743
+ "max_steps": 512,
744
+ "num_input_tokens_seen": 0,
745
+ "num_train_epochs": 3,
746
+ "save_steps": 500,
747
+ "total_flos": 0.0,
748
+ "train_batch_size": 4,
749
+ "trial_name": null,
750
+ "trial_params": null
751
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42c06184f83dd8a6877e02e782c90e9afb80bdbc086da2fed33b592b22abde6d
3
+ size 4664