taicheng commited on
Commit
701b3b8
1 Parent(s): 4c5d498

Model save

Browse files
README.md ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: alignment-handbook/zephyr-7b-sft-full
5
+ tags:
6
+ - trl
7
+ - dpo
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: zephyr-7b-align-scan-4e-07-0.45-cosine-1.0
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # zephyr-7b-align-scan-4e-07-0.45-cosine-1.0
18
+
19
+ This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.6494
22
+ - Rewards/chosen: 0.7541
23
+ - Rewards/rejected: 0.2411
24
+ - Rewards/accuracies: 0.3552
25
+ - Rewards/margins: 0.5130
26
+ - Logps/rejected: -80.5926
27
+ - Logps/chosen: -72.8155
28
+ - Logits/rejected: -2.5245
29
+ - Logits/chosen: -2.5411
30
+
31
+ ## Model description
32
+
33
+ More information needed
34
+
35
+ ## Intended uses & limitations
36
+
37
+ More information needed
38
+
39
+ ## Training and evaluation data
40
+
41
+ More information needed
42
+
43
+ ## Training procedure
44
+
45
+ ### Training hyperparameters
46
+
47
+ The following hyperparameters were used during training:
48
+ - learning_rate: 4e-07
49
+ - train_batch_size: 8
50
+ - eval_batch_size: 8
51
+ - seed: 42
52
+ - distributed_type: multi-GPU
53
+ - num_devices: 4
54
+ - gradient_accumulation_steps: 2
55
+ - total_train_batch_size: 64
56
+ - total_eval_batch_size: 32
57
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
58
+ - lr_scheduler_type: cosine
59
+ - lr_scheduler_warmup_ratio: 0.1
60
+ - num_epochs: 1
61
+
62
+ ### Training results
63
+
64
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
65
+ |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
66
+ | 0.6677 | 0.3484 | 100 | 0.6483 | 0.8134 | 0.3763 | 0.3512 | 0.4372 | -80.2922 | -72.6836 | -2.5394 | -2.5556 |
67
+ | 0.6658 | 0.6969 | 200 | 0.6494 | 0.7541 | 0.2411 | 0.3552 | 0.5130 | -80.5926 | -72.8155 | -2.5245 | -2.5411 |
68
+
69
+
70
+ ### Framework versions
71
+
72
+ - Transformers 4.44.2
73
+ - Pytorch 2.4.0
74
+ - Datasets 2.21.0
75
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.6522094729885407,
5
+ "train_runtime": 3372.3295,
6
+ "train_samples": 18340,
7
+ "train_samples_per_second": 5.438,
8
+ "train_steps_per_second": 0.085
9
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.44.2"
6
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.6522094729885407,
5
+ "train_runtime": 3372.3295,
6
+ "train_samples": 18340,
7
+ "train_samples_per_second": 5.438,
8
+ "train_steps_per_second": 0.085
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,509 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 100,
6
+ "global_step": 287,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.003484320557491289,
13
+ "grad_norm": 260.1177061514537,
14
+ "learning_rate": 1.3793103448275862e-08,
15
+ "logits/chosen": -2.5345611572265625,
16
+ "logits/rejected": -2.581700563430786,
17
+ "logps/chosen": -60.002105712890625,
18
+ "logps/rejected": -99.98374938964844,
19
+ "loss": 0.6931,
20
+ "rewards/accuracies": 0.0,
21
+ "rewards/chosen": 0.0,
22
+ "rewards/margins": 0.0,
23
+ "rewards/rejected": 0.0,
24
+ "step": 1
25
+ },
26
+ {
27
+ "epoch": 0.03484320557491289,
28
+ "grad_norm": 238.95036737182184,
29
+ "learning_rate": 1.3793103448275863e-07,
30
+ "logits/chosen": -2.563187837600708,
31
+ "logits/rejected": -2.5619239807128906,
32
+ "logps/chosen": -59.64890670776367,
33
+ "logps/rejected": -73.38821411132812,
34
+ "loss": 0.6926,
35
+ "rewards/accuracies": 0.2430555522441864,
36
+ "rewards/chosen": 0.004840263165533543,
37
+ "rewards/margins": 0.008125737309455872,
38
+ "rewards/rejected": -0.003285474143922329,
39
+ "step": 10
40
+ },
41
+ {
42
+ "epoch": 0.06968641114982578,
43
+ "grad_norm": 287.8433464848162,
44
+ "learning_rate": 2.7586206896551726e-07,
45
+ "logits/chosen": -2.606386184692383,
46
+ "logits/rejected": -2.56528902053833,
47
+ "logps/chosen": -103.9992446899414,
48
+ "logps/rejected": -94.85326385498047,
49
+ "loss": 0.6826,
50
+ "rewards/accuracies": 0.375,
51
+ "rewards/chosen": 0.05751848220825195,
52
+ "rewards/margins": 0.0391354039311409,
53
+ "rewards/rejected": 0.018383082002401352,
54
+ "step": 20
55
+ },
56
+ {
57
+ "epoch": 0.10452961672473868,
58
+ "grad_norm": 303.36666472427896,
59
+ "learning_rate": 3.999851729426529e-07,
60
+ "logits/chosen": -2.5944175720214844,
61
+ "logits/rejected": -2.5745928287506104,
62
+ "logps/chosen": -82.03970336914062,
63
+ "logps/rejected": -91.46243286132812,
64
+ "loss": 0.6629,
65
+ "rewards/accuracies": 0.3499999940395355,
66
+ "rewards/chosen": 0.20558810234069824,
67
+ "rewards/margins": 0.1706041693687439,
68
+ "rewards/rejected": 0.03498392552137375,
69
+ "step": 30
70
+ },
71
+ {
72
+ "epoch": 0.13937282229965156,
73
+ "grad_norm": 243.63595613939415,
74
+ "learning_rate": 3.982085846069669e-07,
75
+ "logits/chosen": -2.4932851791381836,
76
+ "logits/rejected": -2.4914209842681885,
77
+ "logps/chosen": -77.8083267211914,
78
+ "logps/rejected": -73.65955352783203,
79
+ "loss": 0.645,
80
+ "rewards/accuracies": 0.30000001192092896,
81
+ "rewards/chosen": 0.04954088479280472,
82
+ "rewards/margins": 0.3457469344139099,
83
+ "rewards/rejected": -0.2962060570716858,
84
+ "step": 40
85
+ },
86
+ {
87
+ "epoch": 0.17421602787456447,
88
+ "grad_norm": 209.2280603066799,
89
+ "learning_rate": 3.93496739411485e-07,
90
+ "logits/chosen": -2.519728422164917,
91
+ "logits/rejected": -2.5236926078796387,
92
+ "logps/chosen": -63.118324279785156,
93
+ "logps/rejected": -75.77721405029297,
94
+ "loss": 0.6708,
95
+ "rewards/accuracies": 0.26875001192092896,
96
+ "rewards/chosen": 0.3693724274635315,
97
+ "rewards/margins": 0.22256764769554138,
98
+ "rewards/rejected": 0.14680473506450653,
99
+ "step": 50
100
+ },
101
+ {
102
+ "epoch": 0.20905923344947736,
103
+ "grad_norm": 207.31692334790202,
104
+ "learning_rate": 3.859194147372412e-07,
105
+ "logits/chosen": -2.4840266704559326,
106
+ "logits/rejected": -2.477837324142456,
107
+ "logps/chosen": -71.02902221679688,
108
+ "logps/rejected": -66.88030242919922,
109
+ "loss": 0.6573,
110
+ "rewards/accuracies": 0.3375000059604645,
111
+ "rewards/chosen": 0.9798685908317566,
112
+ "rewards/margins": 0.2921520173549652,
113
+ "rewards/rejected": 0.687716543674469,
114
+ "step": 60
115
+ },
116
+ {
117
+ "epoch": 0.24390243902439024,
118
+ "grad_norm": 237.196777331744,
119
+ "learning_rate": 3.7558882264746717e-07,
120
+ "logits/chosen": -2.5007472038269043,
121
+ "logits/rejected": -2.4955484867095947,
122
+ "logps/chosen": -61.67304611206055,
123
+ "logps/rejected": -66.74317169189453,
124
+ "loss": 0.6486,
125
+ "rewards/accuracies": 0.32499998807907104,
126
+ "rewards/chosen": 1.153420090675354,
127
+ "rewards/margins": 0.39587074518203735,
128
+ "rewards/rejected": 0.7575494050979614,
129
+ "step": 70
130
+ },
131
+ {
132
+ "epoch": 0.2787456445993031,
133
+ "grad_norm": 258.03955447258926,
134
+ "learning_rate": 3.6265794814711307e-07,
135
+ "logits/chosen": -2.449399709701538,
136
+ "logits/rejected": -2.439444065093994,
137
+ "logps/chosen": -73.67487335205078,
138
+ "logps/rejected": -76.30928039550781,
139
+ "loss": 0.671,
140
+ "rewards/accuracies": 0.3125,
141
+ "rewards/chosen": 0.8954970240592957,
142
+ "rewards/margins": 0.3529408872127533,
143
+ "rewards/rejected": 0.5425562262535095,
144
+ "step": 80
145
+ },
146
+ {
147
+ "epoch": 0.313588850174216,
148
+ "grad_norm": 242.91718492869416,
149
+ "learning_rate": 3.4731828363876876e-07,
150
+ "logits/chosen": -2.4762511253356934,
151
+ "logits/rejected": -2.4902822971343994,
152
+ "logps/chosen": -64.53350830078125,
153
+ "logps/rejected": -69.27278900146484,
154
+ "loss": 0.6857,
155
+ "rewards/accuracies": 0.3062500059604645,
156
+ "rewards/chosen": 0.6519443988800049,
157
+ "rewards/margins": 0.27337905764579773,
158
+ "rewards/rejected": 0.37856537103652954,
159
+ "step": 90
160
+ },
161
+ {
162
+ "epoch": 0.34843205574912894,
163
+ "grad_norm": 261.21801117417044,
164
+ "learning_rate": 3.297969931252667e-07,
165
+ "logits/chosen": -2.466578483581543,
166
+ "logits/rejected": -2.466921329498291,
167
+ "logps/chosen": -73.65306091308594,
168
+ "logps/rejected": -80.45304870605469,
169
+ "loss": 0.6677,
170
+ "rewards/accuracies": 0.3375000059604645,
171
+ "rewards/chosen": 0.7281513214111328,
172
+ "rewards/margins": 0.4587801396846771,
173
+ "rewards/rejected": 0.2693712115287781,
174
+ "step": 100
175
+ },
176
+ {
177
+ "epoch": 0.34843205574912894,
178
+ "eval_logits/chosen": -2.5556371212005615,
179
+ "eval_logits/rejected": -2.539428472518921,
180
+ "eval_logps/chosen": -72.68360900878906,
181
+ "eval_logps/rejected": -80.29220581054688,
182
+ "eval_loss": 0.6482642889022827,
183
+ "eval_rewards/accuracies": 0.3511904776096344,
184
+ "eval_rewards/chosen": 0.8134303092956543,
185
+ "eval_rewards/margins": 0.43716490268707275,
186
+ "eval_rewards/rejected": 0.376265287399292,
187
+ "eval_runtime": 116.9271,
188
+ "eval_samples_per_second": 17.105,
189
+ "eval_steps_per_second": 0.539,
190
+ "step": 100
191
+ },
192
+ {
193
+ "epoch": 0.3832752613240418,
194
+ "grad_norm": 287.07453268461984,
195
+ "learning_rate": 3.103535481540892e-07,
196
+ "logits/chosen": -2.494990587234497,
197
+ "logits/rejected": -2.457904100418091,
198
+ "logps/chosen": -71.97386169433594,
199
+ "logps/rejected": -62.46455764770508,
200
+ "loss": 0.6685,
201
+ "rewards/accuracies": 0.2750000059604645,
202
+ "rewards/chosen": 0.5382742881774902,
203
+ "rewards/margins": 0.29256415367126465,
204
+ "rewards/rejected": 0.24571006000041962,
205
+ "step": 110
206
+ },
207
+ {
208
+ "epoch": 0.4181184668989547,
209
+ "grad_norm": 197.14505664650054,
210
+ "learning_rate": 2.8927588532163986e-07,
211
+ "logits/chosen": -2.5188653469085693,
212
+ "logits/rejected": -2.486799955368042,
213
+ "logps/chosen": -76.14109802246094,
214
+ "logps/rejected": -66.33953857421875,
215
+ "loss": 0.6406,
216
+ "rewards/accuracies": 0.3125,
217
+ "rewards/chosen": 0.5942131876945496,
218
+ "rewards/margins": 0.44479331374168396,
219
+ "rewards/rejected": 0.14941997826099396,
220
+ "step": 120
221
+ },
222
+ {
223
+ "epoch": 0.4529616724738676,
224
+ "grad_norm": 309.7749734748652,
225
+ "learning_rate": 2.6687614224062655e-07,
226
+ "logits/chosen": -2.5562925338745117,
227
+ "logits/rejected": -2.537079095840454,
228
+ "logps/chosen": -82.76345825195312,
229
+ "logps/rejected": -87.87675476074219,
230
+ "loss": 0.661,
231
+ "rewards/accuracies": 0.3812499940395355,
232
+ "rewards/chosen": 0.5027960538864136,
233
+ "rewards/margins": 0.6685007810592651,
234
+ "rewards/rejected": -0.16570481657981873,
235
+ "step": 130
236
+ },
237
+ {
238
+ "epoch": 0.4878048780487805,
239
+ "grad_norm": 207.92836576726873,
240
+ "learning_rate": 2.434860351163114e-07,
241
+ "logits/chosen": -2.4571573734283447,
242
+ "logits/rejected": -2.445655345916748,
243
+ "logps/chosen": -79.63760375976562,
244
+ "logps/rejected": -70.80236053466797,
245
+ "loss": 0.6301,
246
+ "rewards/accuracies": 0.3687500059604645,
247
+ "rewards/chosen": 0.678521454334259,
248
+ "rewards/margins": 0.6956696510314941,
249
+ "rewards/rejected": -0.017148202285170555,
250
+ "step": 140
251
+ },
252
+ {
253
+ "epoch": 0.5226480836236934,
254
+ "grad_norm": 266.02287575831133,
255
+ "learning_rate": 2.194519463847738e-07,
256
+ "logits/chosen": -2.522045612335205,
257
+ "logits/rejected": -2.4781124591827393,
258
+ "logps/chosen": -78.24339294433594,
259
+ "logps/rejected": -79.53880310058594,
260
+ "loss": 0.6665,
261
+ "rewards/accuracies": 0.30000001192092896,
262
+ "rewards/chosen": 0.5584267377853394,
263
+ "rewards/margins": 0.5530349016189575,
264
+ "rewards/rejected": 0.005391845945268869,
265
+ "step": 150
266
+ },
267
+ {
268
+ "epoch": 0.5574912891986062,
269
+ "grad_norm": 250.7616411510584,
270
+ "learning_rate": 1.951297951600021e-07,
271
+ "logits/chosen": -2.4823291301727295,
272
+ "logits/rejected": -2.5029988288879395,
273
+ "logps/chosen": -63.5211067199707,
274
+ "logps/rejected": -71.80608367919922,
275
+ "loss": 0.6696,
276
+ "rewards/accuracies": 0.2750000059604645,
277
+ "rewards/chosen": 0.45168429613113403,
278
+ "rewards/margins": 0.3766103982925415,
279
+ "rewards/rejected": 0.07507390528917313,
280
+ "step": 160
281
+ },
282
+ {
283
+ "epoch": 0.5923344947735192,
284
+ "grad_norm": 247.2037562642855,
285
+ "learning_rate": 1.7087976645299907e-07,
286
+ "logits/chosen": -2.485832691192627,
287
+ "logits/rejected": -2.471374034881592,
288
+ "logps/chosen": -68.3868637084961,
289
+ "logps/rejected": -76.93021392822266,
290
+ "loss": 0.6169,
291
+ "rewards/accuracies": 0.30000001192092896,
292
+ "rewards/chosen": 0.2612365484237671,
293
+ "rewards/margins": 0.5385117530822754,
294
+ "rewards/rejected": -0.2772751450538635,
295
+ "step": 170
296
+ },
297
+ {
298
+ "epoch": 0.627177700348432,
299
+ "grad_norm": 253.43980546820382,
300
+ "learning_rate": 1.4706097721752126e-07,
301
+ "logits/chosen": -2.5348095893859863,
302
+ "logits/rejected": -2.523033618927002,
303
+ "logps/chosen": -91.01544189453125,
304
+ "logps/rejected": -86.55479431152344,
305
+ "loss": 0.6875,
306
+ "rewards/accuracies": 0.33125001192092896,
307
+ "rewards/chosen": 0.37877795100212097,
308
+ "rewards/margins": 0.398730605840683,
309
+ "rewards/rejected": -0.019952651113271713,
310
+ "step": 180
311
+ },
312
+ {
313
+ "epoch": 0.662020905923345,
314
+ "grad_norm": 199.96875567633583,
315
+ "learning_rate": 1.240261582126029e-07,
316
+ "logits/chosen": -2.5323076248168945,
317
+ "logits/rejected": -2.5228934288024902,
318
+ "logps/chosen": -70.24234771728516,
319
+ "logps/rejected": -81.30900573730469,
320
+ "loss": 0.641,
321
+ "rewards/accuracies": 0.3187499940395355,
322
+ "rewards/chosen": 0.36411014199256897,
323
+ "rewards/margins": 0.26548105478286743,
324
+ "rewards/rejected": 0.09862907975912094,
325
+ "step": 190
326
+ },
327
+ {
328
+ "epoch": 0.6968641114982579,
329
+ "grad_norm": 331.81322846783684,
330
+ "learning_rate": 1.0211643043778293e-07,
331
+ "logits/chosen": -2.5657148361206055,
332
+ "logits/rejected": -2.5730998516082764,
333
+ "logps/chosen": -88.71566009521484,
334
+ "logps/rejected": -91.1668930053711,
335
+ "loss": 0.6658,
336
+ "rewards/accuracies": 0.375,
337
+ "rewards/chosen": 0.8733166456222534,
338
+ "rewards/margins": 0.4756090044975281,
339
+ "rewards/rejected": 0.3977075219154358,
340
+ "step": 200
341
+ },
342
+ {
343
+ "epoch": 0.6968641114982579,
344
+ "eval_logits/chosen": -2.541111707687378,
345
+ "eval_logits/rejected": -2.5245211124420166,
346
+ "eval_logps/chosen": -72.81554412841797,
347
+ "eval_logps/rejected": -80.59257507324219,
348
+ "eval_loss": 0.6494360566139221,
349
+ "eval_rewards/accuracies": 0.3551587164402008,
350
+ "eval_rewards/chosen": 0.7540590763092041,
351
+ "eval_rewards/margins": 0.5129595994949341,
352
+ "eval_rewards/rejected": 0.24109944701194763,
353
+ "eval_runtime": 113.9336,
354
+ "eval_samples_per_second": 17.554,
355
+ "eval_steps_per_second": 0.553,
356
+ "step": 200
357
+ },
358
+ {
359
+ "epoch": 0.7317073170731707,
360
+ "grad_norm": 387.8686320796793,
361
+ "learning_rate": 8.165625349643729e-08,
362
+ "logits/chosen": -2.5471763610839844,
363
+ "logits/rejected": -2.5208678245544434,
364
+ "logps/chosen": -67.77056884765625,
365
+ "logps/rejected": -63.1707649230957,
366
+ "loss": 0.6465,
367
+ "rewards/accuracies": 0.36250001192092896,
368
+ "rewards/chosen": 0.6968271136283875,
369
+ "rewards/margins": 0.5916789770126343,
370
+ "rewards/rejected": 0.10514805465936661,
371
+ "step": 210
372
+ },
373
+ {
374
+ "epoch": 0.7665505226480837,
375
+ "grad_norm": 195.53605803419836,
376
+ "learning_rate": 6.294862069654417e-08,
377
+ "logits/chosen": -2.573523759841919,
378
+ "logits/rejected": -2.5524840354919434,
379
+ "logps/chosen": -71.91072082519531,
380
+ "logps/rejected": -70.82460021972656,
381
+ "loss": 0.6589,
382
+ "rewards/accuracies": 0.28125,
383
+ "rewards/chosen": 0.6963558197021484,
384
+ "rewards/margins": 0.3189951777458191,
385
+ "rewards/rejected": 0.37736067175865173,
386
+ "step": 220
387
+ },
388
+ {
389
+ "epoch": 0.8013937282229965,
390
+ "grad_norm": 279.18152746545553,
391
+ "learning_rate": 4.6270572044293563e-08,
392
+ "logits/chosen": -2.5709030628204346,
393
+ "logits/rejected": -2.5481770038604736,
394
+ "logps/chosen": -88.14967346191406,
395
+ "logps/rejected": -88.01075744628906,
396
+ "loss": 0.6489,
397
+ "rewards/accuracies": 0.3687500059604645,
398
+ "rewards/chosen": 0.8329347372055054,
399
+ "rewards/margins": 0.7707937359809875,
400
+ "rewards/rejected": 0.06214110180735588,
401
+ "step": 230
402
+ },
403
+ {
404
+ "epoch": 0.8362369337979094,
405
+ "grad_norm": 218.8441311845723,
406
+ "learning_rate": 3.186909157830124e-08,
407
+ "logits/chosen": -2.563115119934082,
408
+ "logits/rejected": -2.5264599323272705,
409
+ "logps/chosen": -85.11534881591797,
410
+ "logps/rejected": -79.63955688476562,
411
+ "loss": 0.6431,
412
+ "rewards/accuracies": 0.3499999940395355,
413
+ "rewards/chosen": 0.7318143248558044,
414
+ "rewards/margins": 0.5083704590797424,
415
+ "rewards/rejected": 0.223443865776062,
416
+ "step": 240
417
+ },
418
+ {
419
+ "epoch": 0.8710801393728222,
420
+ "grad_norm": 242.6767041893939,
421
+ "learning_rate": 1.9957449800512527e-08,
422
+ "logits/chosen": -2.5801711082458496,
423
+ "logits/rejected": -2.544325113296509,
424
+ "logps/chosen": -93.85467529296875,
425
+ "logps/rejected": -89.73026275634766,
426
+ "loss": 0.5901,
427
+ "rewards/accuracies": 0.40625,
428
+ "rewards/chosen": 0.7175648212432861,
429
+ "rewards/margins": 0.6203989386558533,
430
+ "rewards/rejected": 0.09716588258743286,
431
+ "step": 250
432
+ },
433
+ {
434
+ "epoch": 0.9059233449477352,
435
+ "grad_norm": 197.6846790605052,
436
+ "learning_rate": 1.0712045368478117e-08,
437
+ "logits/chosen": -2.4915573596954346,
438
+ "logits/rejected": -2.50749135017395,
439
+ "logps/chosen": -57.76538848876953,
440
+ "logps/rejected": -65.16616821289062,
441
+ "loss": 0.6473,
442
+ "rewards/accuracies": 0.2874999940395355,
443
+ "rewards/chosen": 0.5777884721755981,
444
+ "rewards/margins": 0.3698353171348572,
445
+ "rewards/rejected": 0.20795314013957977,
446
+ "step": 260
447
+ },
448
+ {
449
+ "epoch": 0.9407665505226481,
450
+ "grad_norm": 269.9944966251555,
451
+ "learning_rate": 4.269792820155782e-09,
452
+ "logits/chosen": -2.588334560394287,
453
+ "logits/rejected": -2.5887222290039062,
454
+ "logps/chosen": -67.41829681396484,
455
+ "logps/rejected": -82.52043151855469,
456
+ "loss": 0.6161,
457
+ "rewards/accuracies": 0.3499999940395355,
458
+ "rewards/chosen": 0.6578438878059387,
459
+ "rewards/margins": 0.6527446508407593,
460
+ "rewards/rejected": 0.005099198315292597,
461
+ "step": 270
462
+ },
463
+ {
464
+ "epoch": 0.975609756097561,
465
+ "grad_norm": 209.58164118406862,
466
+ "learning_rate": 7.260950162363721e-10,
467
+ "logits/chosen": -2.499262809753418,
468
+ "logits/rejected": -2.475175380706787,
469
+ "logps/chosen": -66.43128967285156,
470
+ "logps/rejected": -70.41940307617188,
471
+ "loss": 0.6212,
472
+ "rewards/accuracies": 0.3062500059604645,
473
+ "rewards/chosen": 0.6313899159431458,
474
+ "rewards/margins": 0.44463640451431274,
475
+ "rewards/rejected": 0.18675348162651062,
476
+ "step": 280
477
+ },
478
+ {
479
+ "epoch": 1.0,
480
+ "step": 287,
481
+ "total_flos": 0.0,
482
+ "train_loss": 0.6522094729885407,
483
+ "train_runtime": 3372.3295,
484
+ "train_samples_per_second": 5.438,
485
+ "train_steps_per_second": 0.085
486
+ }
487
+ ],
488
+ "logging_steps": 10,
489
+ "max_steps": 287,
490
+ "num_input_tokens_seen": 0,
491
+ "num_train_epochs": 1,
492
+ "save_steps": 100,
493
+ "stateful_callbacks": {
494
+ "TrainerControl": {
495
+ "args": {
496
+ "should_epoch_stop": false,
497
+ "should_evaluate": false,
498
+ "should_log": false,
499
+ "should_save": true,
500
+ "should_training_stop": true
501
+ },
502
+ "attributes": {}
503
+ }
504
+ },
505
+ "total_flos": 0.0,
506
+ "train_batch_size": 8,
507
+ "trial_name": null,
508
+ "trial_params": null
509
+ }