jbjeong91 commited on
Commit
7e208a2
1 Parent(s): b0d9702

Model save

Browse files
README.md ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: llama3.1
4
+ base_model: meta-llama/Meta-Llama-3.1-8B-Instruct
5
+ tags:
6
+ - trl
7
+ - cpo
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: llama3.1-cpo-full-0913
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # llama3.1-cpo-full-0913
18
+
19
+ This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 1.5947
22
+ - Rewards/chosen: -15.5964
23
+ - Rewards/rejected: -16.3155
24
+ - Rewards/accuracies: 0.6261
25
+ - Rewards/margins: 0.7192
26
+ - Logps/rejected: -163.1553
27
+ - Logps/chosen: -155.9637
28
+ - Logits/rejected: -0.4910
29
+ - Logits/chosen: -0.5144
30
+ - Nll Loss: 0.4262
31
+
32
+ ## Model description
33
+
34
+ More information needed
35
+
36
+ ## Intended uses & limitations
37
+
38
+ More information needed
39
+
40
+ ## Training and evaluation data
41
+
42
+ More information needed
43
+
44
+ ## Training procedure
45
+
46
+ ### Training hyperparameters
47
+
48
+ The following hyperparameters were used during training:
49
+ - learning_rate: 1e-06
50
+ - train_batch_size: 4
51
+ - eval_batch_size: 4
52
+ - seed: 42
53
+ - distributed_type: multi-GPU
54
+ - num_devices: 4
55
+ - gradient_accumulation_steps: 8
56
+ - total_train_batch_size: 128
57
+ - total_eval_batch_size: 16
58
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
59
+ - lr_scheduler_type: linear
60
+ - lr_scheduler_warmup_ratio: 0.1
61
+ - num_epochs: 1
62
+
63
+ ### Training results
64
+
65
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Nll Loss |
66
+ |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:--------:|
67
+ | 1.9304 | 0.2311 | 100 | 1.7873 | -14.9945 | -15.3576 | 0.5804 | 0.3632 | -153.5762 | -149.9445 | -0.3649 | -0.3854 | 0.4085 |
68
+ | 1.6908 | 0.4623 | 200 | 1.6702 | -15.6437 | -16.2439 | 0.5978 | 0.6002 | -162.4385 | -156.4369 | -0.3777 | -0.4014 | 0.4252 |
69
+ | 1.6317 | 0.6934 | 300 | 1.6162 | -15.4682 | -16.1519 | 0.6152 | 0.6837 | -161.5185 | -154.6818 | -0.4753 | -0.4948 | 0.4202 |
70
+ | 1.62 | 0.9246 | 400 | 1.5947 | -15.5964 | -16.3155 | 0.6261 | 0.7192 | -163.1553 | -155.9637 | -0.4910 | -0.5144 | 0.4262 |
71
+
72
+
73
+ ### Framework versions
74
+
75
+ - Transformers 4.44.2
76
+ - Pytorch 2.3.1
77
+ - Datasets 2.21.0
78
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9985553308292401,
3
+ "total_flos": 0.0,
4
+ "train_loss": 1.7731637126869626,
5
+ "train_runtime": 10231.9294,
6
+ "train_samples": 55376,
7
+ "train_samples_per_second": 5.412,
8
+ "train_steps_per_second": 0.042
9
+ }
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 128000,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 128001,
6
+ 128008,
7
+ 128009
8
+ ],
9
+ "temperature": 0.6,
10
+ "top_p": 0.9,
11
+ "transformers_version": "4.44.2"
12
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9985553308292401,
3
+ "total_flos": 0.0,
4
+ "train_loss": 1.7731637126869626,
5
+ "train_runtime": 10231.9294,
6
+ "train_samples": 55376,
7
+ "train_samples_per_second": 5.412,
8
+ "train_steps_per_second": 0.042
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,798 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9985553308292401,
5
+ "eval_steps": 100,
6
+ "global_step": 432,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.023114706732158336,
13
+ "grad_norm": 66.92803955078125,
14
+ "learning_rate": 2.2727272727272726e-07,
15
+ "logits/chosen": -0.33564168214797974,
16
+ "logits/rejected": -0.3153206706047058,
17
+ "logps/chosen": -269.33428955078125,
18
+ "logps/rejected": -267.60894775390625,
19
+ "loss": 2.6157,
20
+ "nll_loss": 0.741317629814148,
21
+ "rewards/accuracies": 0.48750001192092896,
22
+ "rewards/chosen": -26.933429718017578,
23
+ "rewards/margins": -0.17253029346466064,
24
+ "rewards/rejected": -26.76089859008789,
25
+ "step": 10
26
+ },
27
+ {
28
+ "epoch": 0.04622941346431667,
29
+ "grad_norm": 54.904842376708984,
30
+ "learning_rate": 4.545454545454545e-07,
31
+ "logits/chosen": -0.3472834527492523,
32
+ "logits/rejected": -0.3292314112186432,
33
+ "logps/chosen": -260.78680419921875,
34
+ "logps/rejected": -267.32977294921875,
35
+ "loss": 2.5223,
36
+ "nll_loss": 0.7186762094497681,
37
+ "rewards/accuracies": 0.565625011920929,
38
+ "rewards/chosen": -26.078683853149414,
39
+ "rewards/margins": 0.6542952060699463,
40
+ "rewards/rejected": -26.732980728149414,
41
+ "step": 20
42
+ },
43
+ {
44
+ "epoch": 0.06934412019647501,
45
+ "grad_norm": 57.42607498168945,
46
+ "learning_rate": 6.818181818181817e-07,
47
+ "logits/chosen": -0.3462437689304352,
48
+ "logits/rejected": -0.334714412689209,
49
+ "logps/chosen": -247.49801635742188,
50
+ "logps/rejected": -250.79483032226562,
51
+ "loss": 2.3549,
52
+ "nll_loss": 0.7035976052284241,
53
+ "rewards/accuracies": 0.515625,
54
+ "rewards/chosen": -24.749801635742188,
55
+ "rewards/margins": 0.3296825885772705,
56
+ "rewards/rejected": -25.079483032226562,
57
+ "step": 30
58
+ },
59
+ {
60
+ "epoch": 0.09245882692863334,
61
+ "grad_norm": 46.98875427246094,
62
+ "learning_rate": 9.09090909090909e-07,
63
+ "logits/chosen": -0.5729629993438721,
64
+ "logits/rejected": -0.5595733523368835,
65
+ "logps/chosen": -215.0082244873047,
66
+ "logps/rejected": -216.9280548095703,
67
+ "loss": 2.1725,
68
+ "nll_loss": 0.6498099565505981,
69
+ "rewards/accuracies": 0.518750011920929,
70
+ "rewards/chosen": -21.500822067260742,
71
+ "rewards/margins": 0.19198258221149445,
72
+ "rewards/rejected": -21.692806243896484,
73
+ "step": 40
74
+ },
75
+ {
76
+ "epoch": 0.11557353366079168,
77
+ "grad_norm": 47.18745803833008,
78
+ "learning_rate": 9.845360824742267e-07,
79
+ "logits/chosen": -0.8356858491897583,
80
+ "logits/rejected": -0.8102104067802429,
81
+ "logps/chosen": -196.44061279296875,
82
+ "logps/rejected": -195.4991455078125,
83
+ "loss": 2.1857,
84
+ "nll_loss": 0.5283800959587097,
85
+ "rewards/accuracies": 0.5062500238418579,
86
+ "rewards/chosen": -19.644062042236328,
87
+ "rewards/margins": -0.09414808452129364,
88
+ "rewards/rejected": -19.549915313720703,
89
+ "step": 50
90
+ },
91
+ {
92
+ "epoch": 0.13868824039295002,
93
+ "grad_norm": 55.287715911865234,
94
+ "learning_rate": 9.587628865979382e-07,
95
+ "logits/chosen": -0.6823571920394897,
96
+ "logits/rejected": -0.68729567527771,
97
+ "logps/chosen": -164.70936584472656,
98
+ "logps/rejected": -164.9478302001953,
99
+ "loss": 2.0037,
100
+ "nll_loss": 0.4647987484931946,
101
+ "rewards/accuracies": 0.534375011920929,
102
+ "rewards/chosen": -16.470935821533203,
103
+ "rewards/margins": 0.02384711429476738,
104
+ "rewards/rejected": -16.494781494140625,
105
+ "step": 60
106
+ },
107
+ {
108
+ "epoch": 0.16180294712510834,
109
+ "grad_norm": 51.4761962890625,
110
+ "learning_rate": 9.329896907216495e-07,
111
+ "logits/chosen": -0.5190773010253906,
112
+ "logits/rejected": -0.49602770805358887,
113
+ "logps/chosen": -155.39878845214844,
114
+ "logps/rejected": -157.3554229736328,
115
+ "loss": 1.9943,
116
+ "nll_loss": 0.4453979432582855,
117
+ "rewards/accuracies": 0.518750011920929,
118
+ "rewards/chosen": -15.539876937866211,
119
+ "rewards/margins": 0.195662721991539,
120
+ "rewards/rejected": -15.735540390014648,
121
+ "step": 70
122
+ },
123
+ {
124
+ "epoch": 0.1849176538572667,
125
+ "grad_norm": 46.25474166870117,
126
+ "learning_rate": 9.072164948453608e-07,
127
+ "logits/chosen": -0.4949778914451599,
128
+ "logits/rejected": -0.47060757875442505,
129
+ "logps/chosen": -158.2879180908203,
130
+ "logps/rejected": -161.47145080566406,
131
+ "loss": 1.8594,
132
+ "nll_loss": 0.4298928380012512,
133
+ "rewards/accuracies": 0.5093749761581421,
134
+ "rewards/chosen": -15.828791618347168,
135
+ "rewards/margins": 0.3183526396751404,
136
+ "rewards/rejected": -16.147144317626953,
137
+ "step": 80
138
+ },
139
+ {
140
+ "epoch": 0.208032360589425,
141
+ "grad_norm": 49.99284744262695,
142
+ "learning_rate": 8.814432989690721e-07,
143
+ "logits/chosen": -0.37455958127975464,
144
+ "logits/rejected": -0.3673579692840576,
145
+ "logps/chosen": -153.99154663085938,
146
+ "logps/rejected": -161.9925079345703,
147
+ "loss": 1.7748,
148
+ "nll_loss": 0.42108353972435,
149
+ "rewards/accuracies": 0.59375,
150
+ "rewards/chosen": -15.39915657043457,
151
+ "rewards/margins": 0.8000966310501099,
152
+ "rewards/rejected": -16.19925308227539,
153
+ "step": 90
154
+ },
155
+ {
156
+ "epoch": 0.23114706732158335,
157
+ "grad_norm": 47.385650634765625,
158
+ "learning_rate": 8.556701030927834e-07,
159
+ "logits/chosen": -0.3636273443698883,
160
+ "logits/rejected": -0.3537142872810364,
161
+ "logps/chosen": -153.48483276367188,
162
+ "logps/rejected": -156.58370971679688,
163
+ "loss": 1.9304,
164
+ "nll_loss": 0.4187200665473938,
165
+ "rewards/accuracies": 0.5406249761581421,
166
+ "rewards/chosen": -15.348485946655273,
167
+ "rewards/margins": 0.3098832070827484,
168
+ "rewards/rejected": -15.658369064331055,
169
+ "step": 100
170
+ },
171
+ {
172
+ "epoch": 0.23114706732158335,
173
+ "eval_logits/chosen": -0.3853626549243927,
174
+ "eval_logits/rejected": -0.36488524079322815,
175
+ "eval_logps/chosen": -149.94454956054688,
176
+ "eval_logps/rejected": -153.576171875,
177
+ "eval_loss": 1.7872967720031738,
178
+ "eval_nll_loss": 0.4085230827331543,
179
+ "eval_rewards/accuracies": 0.5804347991943359,
180
+ "eval_rewards/chosen": -14.994454383850098,
181
+ "eval_rewards/margins": 0.36316320300102234,
182
+ "eval_rewards/rejected": -15.357619285583496,
183
+ "eval_runtime": 73.8265,
184
+ "eval_samples_per_second": 24.734,
185
+ "eval_steps_per_second": 1.558,
186
+ "step": 100
187
+ },
188
+ {
189
+ "epoch": 0.2542617740537417,
190
+ "grad_norm": 47.490840911865234,
191
+ "learning_rate": 8.298969072164948e-07,
192
+ "logits/chosen": -0.3666607141494751,
193
+ "logits/rejected": -0.3409901261329651,
194
+ "logps/chosen": -147.7798309326172,
195
+ "logps/rejected": -152.14883422851562,
196
+ "loss": 1.7164,
197
+ "nll_loss": 0.40307506918907166,
198
+ "rewards/accuracies": 0.5687500238418579,
199
+ "rewards/chosen": -14.777984619140625,
200
+ "rewards/margins": 0.4368988573551178,
201
+ "rewards/rejected": -15.214881896972656,
202
+ "step": 110
203
+ },
204
+ {
205
+ "epoch": 0.27737648078590005,
206
+ "grad_norm": 45.41544723510742,
207
+ "learning_rate": 8.041237113402062e-07,
208
+ "logits/chosen": -0.36932340264320374,
209
+ "logits/rejected": -0.3461097776889801,
210
+ "logps/chosen": -155.2689208984375,
211
+ "logps/rejected": -156.53543090820312,
212
+ "loss": 1.7549,
213
+ "nll_loss": 0.42143669724464417,
214
+ "rewards/accuracies": 0.534375011920929,
215
+ "rewards/chosen": -15.526891708374023,
216
+ "rewards/margins": 0.12664994597434998,
217
+ "rewards/rejected": -15.653543472290039,
218
+ "step": 120
219
+ },
220
+ {
221
+ "epoch": 0.30049118751805837,
222
+ "grad_norm": 55.190162658691406,
223
+ "learning_rate": 7.783505154639175e-07,
224
+ "logits/chosen": -0.36812376976013184,
225
+ "logits/rejected": -0.3466120660305023,
226
+ "logps/chosen": -153.25393676757812,
227
+ "logps/rejected": -161.2375030517578,
228
+ "loss": 1.8234,
229
+ "nll_loss": 0.4233035147190094,
230
+ "rewards/accuracies": 0.59375,
231
+ "rewards/chosen": -15.325393676757812,
232
+ "rewards/margins": 0.7983576655387878,
233
+ "rewards/rejected": -16.123750686645508,
234
+ "step": 130
235
+ },
236
+ {
237
+ "epoch": 0.3236058942502167,
238
+ "grad_norm": 58.641231536865234,
239
+ "learning_rate": 7.525773195876288e-07,
240
+ "logits/chosen": -0.42011794447898865,
241
+ "logits/rejected": -0.41268259286880493,
242
+ "logps/chosen": -144.4340362548828,
243
+ "logps/rejected": -149.6342010498047,
244
+ "loss": 1.7976,
245
+ "nll_loss": 0.4138007164001465,
246
+ "rewards/accuracies": 0.5874999761581421,
247
+ "rewards/chosen": -14.443403244018555,
248
+ "rewards/margins": 0.520018458366394,
249
+ "rewards/rejected": -14.963422775268555,
250
+ "step": 140
251
+ },
252
+ {
253
+ "epoch": 0.34672060098237506,
254
+ "grad_norm": 59.50709533691406,
255
+ "learning_rate": 7.268041237113402e-07,
256
+ "logits/chosen": -0.45735687017440796,
257
+ "logits/rejected": -0.44332581758499146,
258
+ "logps/chosen": -141.5747528076172,
259
+ "logps/rejected": -147.01950073242188,
260
+ "loss": 1.8755,
261
+ "nll_loss": 0.41054767370224,
262
+ "rewards/accuracies": 0.578125,
263
+ "rewards/chosen": -14.157475471496582,
264
+ "rewards/margins": 0.5444743037223816,
265
+ "rewards/rejected": -14.701950073242188,
266
+ "step": 150
267
+ },
268
+ {
269
+ "epoch": 0.3698353077145334,
270
+ "grad_norm": 40.405235290527344,
271
+ "learning_rate": 7.010309278350515e-07,
272
+ "logits/chosen": -0.4439857602119446,
273
+ "logits/rejected": -0.41624826192855835,
274
+ "logps/chosen": -156.5723876953125,
275
+ "logps/rejected": -160.45640563964844,
276
+ "loss": 1.7024,
277
+ "nll_loss": 0.4132766127586365,
278
+ "rewards/accuracies": 0.5687500238418579,
279
+ "rewards/chosen": -15.65723991394043,
280
+ "rewards/margins": 0.38840025663375854,
281
+ "rewards/rejected": -16.045639038085938,
282
+ "step": 160
283
+ },
284
+ {
285
+ "epoch": 0.3929500144466917,
286
+ "grad_norm": 51.05006790161133,
287
+ "learning_rate": 6.752577319587629e-07,
288
+ "logits/chosen": -0.39237576723098755,
289
+ "logits/rejected": -0.3862777650356293,
290
+ "logps/chosen": -152.97549438476562,
291
+ "logps/rejected": -160.07443237304688,
292
+ "loss": 1.599,
293
+ "nll_loss": 0.41846928000450134,
294
+ "rewards/accuracies": 0.5562499761581421,
295
+ "rewards/chosen": -15.2975492477417,
296
+ "rewards/margins": 0.7098936438560486,
297
+ "rewards/rejected": -16.007442474365234,
298
+ "step": 170
299
+ },
300
+ {
301
+ "epoch": 0.41606472117885,
302
+ "grad_norm": 48.777130126953125,
303
+ "learning_rate": 6.494845360824742e-07,
304
+ "logits/chosen": -0.4046599864959717,
305
+ "logits/rejected": -0.4020842909812927,
306
+ "logps/chosen": -148.8914794921875,
307
+ "logps/rejected": -155.31307983398438,
308
+ "loss": 1.6521,
309
+ "nll_loss": 0.4291691780090332,
310
+ "rewards/accuracies": 0.565625011920929,
311
+ "rewards/chosen": -14.889147758483887,
312
+ "rewards/margins": 0.6421611309051514,
313
+ "rewards/rejected": -15.531309127807617,
314
+ "step": 180
315
+ },
316
+ {
317
+ "epoch": 0.4391794279110084,
318
+ "grad_norm": 51.11661911010742,
319
+ "learning_rate": 6.237113402061855e-07,
320
+ "logits/chosen": -0.42425212264060974,
321
+ "logits/rejected": -0.4136783480644226,
322
+ "logps/chosen": -156.27357482910156,
323
+ "logps/rejected": -162.0409393310547,
324
+ "loss": 1.7189,
325
+ "nll_loss": 0.42820248007774353,
326
+ "rewards/accuracies": 0.5843750238418579,
327
+ "rewards/chosen": -15.627357482910156,
328
+ "rewards/margins": 0.5767360329627991,
329
+ "rewards/rejected": -16.2040958404541,
330
+ "step": 190
331
+ },
332
+ {
333
+ "epoch": 0.4622941346431667,
334
+ "grad_norm": 45.06605529785156,
335
+ "learning_rate": 5.979381443298969e-07,
336
+ "logits/chosen": -0.3681766986846924,
337
+ "logits/rejected": -0.36281704902648926,
338
+ "logps/chosen": -157.00155639648438,
339
+ "logps/rejected": -163.3647003173828,
340
+ "loss": 1.6908,
341
+ "nll_loss": 0.4334492087364197,
342
+ "rewards/accuracies": 0.6187499761581421,
343
+ "rewards/chosen": -15.700152397155762,
344
+ "rewards/margins": 0.6363152265548706,
345
+ "rewards/rejected": -16.336469650268555,
346
+ "step": 200
347
+ },
348
+ {
349
+ "epoch": 0.4622941346431667,
350
+ "eval_logits/chosen": -0.4014091491699219,
351
+ "eval_logits/rejected": -0.3777381181716919,
352
+ "eval_logps/chosen": -156.43685913085938,
353
+ "eval_logps/rejected": -162.43849182128906,
354
+ "eval_loss": 1.6701573133468628,
355
+ "eval_nll_loss": 0.42524340748786926,
356
+ "eval_rewards/accuracies": 0.5978260636329651,
357
+ "eval_rewards/chosen": -15.643685340881348,
358
+ "eval_rewards/margins": 0.6001652479171753,
359
+ "eval_rewards/rejected": -16.243852615356445,
360
+ "eval_runtime": 73.8669,
361
+ "eval_samples_per_second": 24.72,
362
+ "eval_steps_per_second": 1.557,
363
+ "step": 200
364
+ },
365
+ {
366
+ "epoch": 0.48540884137532503,
367
+ "grad_norm": 52.39714813232422,
368
+ "learning_rate": 5.721649484536082e-07,
369
+ "logits/chosen": -0.4187684953212738,
370
+ "logits/rejected": -0.39357370138168335,
371
+ "logps/chosen": -151.34727478027344,
372
+ "logps/rejected": -153.41477966308594,
373
+ "loss": 1.6691,
374
+ "nll_loss": 0.42135825753211975,
375
+ "rewards/accuracies": 0.546875,
376
+ "rewards/chosen": -15.134727478027344,
377
+ "rewards/margins": 0.2067503184080124,
378
+ "rewards/rejected": -15.34147834777832,
379
+ "step": 210
380
+ },
381
+ {
382
+ "epoch": 0.5085235481074833,
383
+ "grad_norm": 48.871177673339844,
384
+ "learning_rate": 5.463917525773195e-07,
385
+ "logits/chosen": -0.46896496415138245,
386
+ "logits/rejected": -0.4411422312259674,
387
+ "logps/chosen": -160.63766479492188,
388
+ "logps/rejected": -166.47012329101562,
389
+ "loss": 1.7268,
390
+ "nll_loss": 0.43222665786743164,
391
+ "rewards/accuracies": 0.6156250238418579,
392
+ "rewards/chosen": -16.063764572143555,
393
+ "rewards/margins": 0.5832474231719971,
394
+ "rewards/rejected": -16.64701271057129,
395
+ "step": 220
396
+ },
397
+ {
398
+ "epoch": 0.5316382548396418,
399
+ "grad_norm": 46.8618278503418,
400
+ "learning_rate": 5.20618556701031e-07,
401
+ "logits/chosen": -0.4806763529777527,
402
+ "logits/rejected": -0.4655001163482666,
403
+ "logps/chosen": -164.1845245361328,
404
+ "logps/rejected": -170.3590087890625,
405
+ "loss": 1.753,
406
+ "nll_loss": 0.4462898373603821,
407
+ "rewards/accuracies": 0.5843750238418579,
408
+ "rewards/chosen": -16.4184513092041,
409
+ "rewards/margins": 0.6174517869949341,
410
+ "rewards/rejected": -17.03590202331543,
411
+ "step": 230
412
+ },
413
+ {
414
+ "epoch": 0.5547529615718001,
415
+ "grad_norm": 48.733802795410156,
416
+ "learning_rate": 4.948453608247422e-07,
417
+ "logits/chosen": -0.42387205362319946,
418
+ "logits/rejected": -0.4110351502895355,
419
+ "logps/chosen": -159.4880828857422,
420
+ "logps/rejected": -164.54312133789062,
421
+ "loss": 1.5882,
422
+ "nll_loss": 0.4322621822357178,
423
+ "rewards/accuracies": 0.5687500238418579,
424
+ "rewards/chosen": -15.948808670043945,
425
+ "rewards/margins": 0.5055034756660461,
426
+ "rewards/rejected": -16.45431137084961,
427
+ "step": 240
428
+ },
429
+ {
430
+ "epoch": 0.5778676683039584,
431
+ "grad_norm": 53.66923141479492,
432
+ "learning_rate": 4.6907216494845357e-07,
433
+ "logits/chosen": -0.45519933104515076,
434
+ "logits/rejected": -0.44147634506225586,
435
+ "logps/chosen": -156.13479614257812,
436
+ "logps/rejected": -159.08212280273438,
437
+ "loss": 1.7404,
438
+ "nll_loss": 0.4152873158454895,
439
+ "rewards/accuracies": 0.5406249761581421,
440
+ "rewards/chosen": -15.613479614257812,
441
+ "rewards/margins": 0.29473432898521423,
442
+ "rewards/rejected": -15.908210754394531,
443
+ "step": 250
444
+ },
445
+ {
446
+ "epoch": 0.6009823750361167,
447
+ "grad_norm": 50.42735290527344,
448
+ "learning_rate": 4.432989690721649e-07,
449
+ "logits/chosen": -0.5512745976448059,
450
+ "logits/rejected": -0.5374751091003418,
451
+ "logps/chosen": -152.5029296875,
452
+ "logps/rejected": -160.7849884033203,
453
+ "loss": 1.5895,
454
+ "nll_loss": 0.4251280725002289,
455
+ "rewards/accuracies": 0.6312500238418579,
456
+ "rewards/chosen": -15.250292778015137,
457
+ "rewards/margins": 0.8282074928283691,
458
+ "rewards/rejected": -16.07849884033203,
459
+ "step": 260
460
+ },
461
+ {
462
+ "epoch": 0.624097081768275,
463
+ "grad_norm": 42.25716781616211,
464
+ "learning_rate": 4.175257731958763e-07,
465
+ "logits/chosen": -0.5728802680969238,
466
+ "logits/rejected": -0.5652969479560852,
467
+ "logps/chosen": -153.87515258789062,
468
+ "logps/rejected": -160.92990112304688,
469
+ "loss": 1.622,
470
+ "nll_loss": 0.42097169160842896,
471
+ "rewards/accuracies": 0.6156250238418579,
472
+ "rewards/chosen": -15.3875150680542,
473
+ "rewards/margins": 0.7054744362831116,
474
+ "rewards/rejected": -16.09299087524414,
475
+ "step": 270
476
+ },
477
+ {
478
+ "epoch": 0.6472117885004334,
479
+ "grad_norm": 52.24223327636719,
480
+ "learning_rate": 3.917525773195876e-07,
481
+ "logits/chosen": -0.5287462472915649,
482
+ "logits/rejected": -0.5079108476638794,
483
+ "logps/chosen": -155.820068359375,
484
+ "logps/rejected": -165.27578735351562,
485
+ "loss": 1.6145,
486
+ "nll_loss": 0.42984142899513245,
487
+ "rewards/accuracies": 0.643750011920929,
488
+ "rewards/chosen": -15.582005500793457,
489
+ "rewards/margins": 0.9455726742744446,
490
+ "rewards/rejected": -16.527578353881836,
491
+ "step": 280
492
+ },
493
+ {
494
+ "epoch": 0.6703264952325917,
495
+ "grad_norm": 54.0615119934082,
496
+ "learning_rate": 3.659793814432989e-07,
497
+ "logits/chosen": -0.5829291343688965,
498
+ "logits/rejected": -0.5588080883026123,
499
+ "logps/chosen": -160.86459350585938,
500
+ "logps/rejected": -166.34828186035156,
501
+ "loss": 1.5808,
502
+ "nll_loss": 0.4233360290527344,
503
+ "rewards/accuracies": 0.609375,
504
+ "rewards/chosen": -16.08646011352539,
505
+ "rewards/margins": 0.5483680963516235,
506
+ "rewards/rejected": -16.63482666015625,
507
+ "step": 290
508
+ },
509
+ {
510
+ "epoch": 0.6934412019647501,
511
+ "grad_norm": 43.99635696411133,
512
+ "learning_rate": 3.402061855670103e-07,
513
+ "logits/chosen": -0.5929441452026367,
514
+ "logits/rejected": -0.585766613483429,
515
+ "logps/chosen": -164.1534423828125,
516
+ "logps/rejected": -173.26889038085938,
517
+ "loss": 1.6317,
518
+ "nll_loss": 0.43874359130859375,
519
+ "rewards/accuracies": 0.609375,
520
+ "rewards/chosen": -16.415346145629883,
521
+ "rewards/margins": 0.911544144153595,
522
+ "rewards/rejected": -17.326889038085938,
523
+ "step": 300
524
+ },
525
+ {
526
+ "epoch": 0.6934412019647501,
527
+ "eval_logits/chosen": -0.49481379985809326,
528
+ "eval_logits/rejected": -0.4753292500972748,
529
+ "eval_logps/chosen": -154.68177795410156,
530
+ "eval_logps/rejected": -161.5185089111328,
531
+ "eval_loss": 1.6161738634109497,
532
+ "eval_nll_loss": 0.42018982768058777,
533
+ "eval_rewards/accuracies": 0.615217387676239,
534
+ "eval_rewards/chosen": -15.468178749084473,
535
+ "eval_rewards/margins": 0.6836734414100647,
536
+ "eval_rewards/rejected": -16.151851654052734,
537
+ "eval_runtime": 74.0087,
538
+ "eval_samples_per_second": 24.673,
539
+ "eval_steps_per_second": 1.554,
540
+ "step": 300
541
+ },
542
+ {
543
+ "epoch": 0.7165559086969084,
544
+ "grad_norm": 47.26166534423828,
545
+ "learning_rate": 3.1443298969072163e-07,
546
+ "logits/chosen": -0.5505023002624512,
547
+ "logits/rejected": -0.5488861799240112,
548
+ "logps/chosen": -164.3349609375,
549
+ "logps/rejected": -167.97909545898438,
550
+ "loss": 1.6871,
551
+ "nll_loss": 0.42464059591293335,
552
+ "rewards/accuracies": 0.550000011920929,
553
+ "rewards/chosen": -16.433496475219727,
554
+ "rewards/margins": 0.36441320180892944,
555
+ "rewards/rejected": -16.797908782958984,
556
+ "step": 310
557
+ },
558
+ {
559
+ "epoch": 0.7396706154290668,
560
+ "grad_norm": 44.56684875488281,
561
+ "learning_rate": 2.8865979381443296e-07,
562
+ "logits/chosen": -0.5739372372627258,
563
+ "logits/rejected": -0.5621416568756104,
564
+ "logps/chosen": -154.22021484375,
565
+ "logps/rejected": -162.76797485351562,
566
+ "loss": 1.6709,
567
+ "nll_loss": 0.42187362909317017,
568
+ "rewards/accuracies": 0.6343749761581421,
569
+ "rewards/chosen": -15.422021865844727,
570
+ "rewards/margins": 0.8547781109809875,
571
+ "rewards/rejected": -16.276798248291016,
572
+ "step": 320
573
+ },
574
+ {
575
+ "epoch": 0.7627853221612251,
576
+ "grad_norm": 48.82611083984375,
577
+ "learning_rate": 2.6288659793814435e-07,
578
+ "logits/chosen": -0.584633469581604,
579
+ "logits/rejected": -0.5864993333816528,
580
+ "logps/chosen": -154.85739135742188,
581
+ "logps/rejected": -161.60858154296875,
582
+ "loss": 1.6549,
583
+ "nll_loss": 0.43096083402633667,
584
+ "rewards/accuracies": 0.6156250238418579,
585
+ "rewards/chosen": -15.485738754272461,
586
+ "rewards/margins": 0.6751174330711365,
587
+ "rewards/rejected": -16.160858154296875,
588
+ "step": 330
589
+ },
590
+ {
591
+ "epoch": 0.7859000288933834,
592
+ "grad_norm": 46.865325927734375,
593
+ "learning_rate": 2.3711340206185566e-07,
594
+ "logits/chosen": -0.5921510457992554,
595
+ "logits/rejected": -0.5866528153419495,
596
+ "logps/chosen": -157.51187133789062,
597
+ "logps/rejected": -162.7019500732422,
598
+ "loss": 1.5515,
599
+ "nll_loss": 0.4305364489555359,
600
+ "rewards/accuracies": 0.5718749761581421,
601
+ "rewards/chosen": -15.751187324523926,
602
+ "rewards/margins": 0.5190097093582153,
603
+ "rewards/rejected": -16.27019500732422,
604
+ "step": 340
605
+ },
606
+ {
607
+ "epoch": 0.8090147356255417,
608
+ "grad_norm": 45.32524871826172,
609
+ "learning_rate": 2.11340206185567e-07,
610
+ "logits/chosen": -0.574793815612793,
611
+ "logits/rejected": -0.575833261013031,
612
+ "logps/chosen": -158.30972290039062,
613
+ "logps/rejected": -167.1160888671875,
614
+ "loss": 1.6559,
615
+ "nll_loss": 0.4133908152580261,
616
+ "rewards/accuracies": 0.6031249761581421,
617
+ "rewards/chosen": -15.830973625183105,
618
+ "rewards/margins": 0.8806363940238953,
619
+ "rewards/rejected": -16.71160888671875,
620
+ "step": 350
621
+ },
622
+ {
623
+ "epoch": 0.8321294423577,
624
+ "grad_norm": 53.514156341552734,
625
+ "learning_rate": 1.8556701030927835e-07,
626
+ "logits/chosen": -0.5572192668914795,
627
+ "logits/rejected": -0.5455694794654846,
628
+ "logps/chosen": -161.35818481445312,
629
+ "logps/rejected": -166.35061645507812,
630
+ "loss": 1.7006,
631
+ "nll_loss": 0.4319698214530945,
632
+ "rewards/accuracies": 0.606249988079071,
633
+ "rewards/chosen": -16.135818481445312,
634
+ "rewards/margins": 0.4992440342903137,
635
+ "rewards/rejected": -16.635061264038086,
636
+ "step": 360
637
+ },
638
+ {
639
+ "epoch": 0.8552441490898585,
640
+ "grad_norm": 47.664703369140625,
641
+ "learning_rate": 1.5979381443298966e-07,
642
+ "logits/chosen": -0.5517255067825317,
643
+ "logits/rejected": -0.5260570049285889,
644
+ "logps/chosen": -155.55886840820312,
645
+ "logps/rejected": -165.2543487548828,
646
+ "loss": 1.5564,
647
+ "nll_loss": 0.42134684324264526,
648
+ "rewards/accuracies": 0.659375011920929,
649
+ "rewards/chosen": -15.555887222290039,
650
+ "rewards/margins": 0.9695472717285156,
651
+ "rewards/rejected": -16.525434494018555,
652
+ "step": 370
653
+ },
654
+ {
655
+ "epoch": 0.8783588558220168,
656
+ "grad_norm": 48.99809646606445,
657
+ "learning_rate": 1.3402061855670102e-07,
658
+ "logits/chosen": -0.5434561967849731,
659
+ "logits/rejected": -0.5287705063819885,
660
+ "logps/chosen": -154.3162841796875,
661
+ "logps/rejected": -161.20472717285156,
662
+ "loss": 1.5817,
663
+ "nll_loss": 0.431587278842926,
664
+ "rewards/accuracies": 0.6000000238418579,
665
+ "rewards/chosen": -15.431628227233887,
666
+ "rewards/margins": 0.6888439655303955,
667
+ "rewards/rejected": -16.120471954345703,
668
+ "step": 380
669
+ },
670
+ {
671
+ "epoch": 0.9014735625541751,
672
+ "grad_norm": 52.46923065185547,
673
+ "learning_rate": 1.0824742268041237e-07,
674
+ "logits/chosen": -0.5911422967910767,
675
+ "logits/rejected": -0.5752480626106262,
676
+ "logps/chosen": -158.2704620361328,
677
+ "logps/rejected": -168.7931671142578,
678
+ "loss": 1.6055,
679
+ "nll_loss": 0.42453208565711975,
680
+ "rewards/accuracies": 0.625,
681
+ "rewards/chosen": -15.827044486999512,
682
+ "rewards/margins": 1.0522701740264893,
683
+ "rewards/rejected": -16.879314422607422,
684
+ "step": 390
685
+ },
686
+ {
687
+ "epoch": 0.9245882692863334,
688
+ "grad_norm": 50.29128646850586,
689
+ "learning_rate": 8.24742268041237e-08,
690
+ "logits/chosen": -0.5491029024124146,
691
+ "logits/rejected": -0.5335959196090698,
692
+ "logps/chosen": -162.01458740234375,
693
+ "logps/rejected": -167.11669921875,
694
+ "loss": 1.62,
695
+ "nll_loss": 0.44402360916137695,
696
+ "rewards/accuracies": 0.5874999761581421,
697
+ "rewards/chosen": -16.201457977294922,
698
+ "rewards/margins": 0.5102119445800781,
699
+ "rewards/rejected": -16.711669921875,
700
+ "step": 400
701
+ },
702
+ {
703
+ "epoch": 0.9245882692863334,
704
+ "eval_logits/chosen": -0.514378547668457,
705
+ "eval_logits/rejected": -0.49102360010147095,
706
+ "eval_logps/chosen": -155.9636688232422,
707
+ "eval_logps/rejected": -163.15530395507812,
708
+ "eval_loss": 1.5946580171585083,
709
+ "eval_nll_loss": 0.4261849522590637,
710
+ "eval_rewards/accuracies": 0.626086950302124,
711
+ "eval_rewards/chosen": -15.596366882324219,
712
+ "eval_rewards/margins": 0.7191624045372009,
713
+ "eval_rewards/rejected": -16.315528869628906,
714
+ "eval_runtime": 73.9588,
715
+ "eval_samples_per_second": 24.689,
716
+ "eval_steps_per_second": 1.555,
717
+ "step": 400
718
+ },
719
+ {
720
+ "epoch": 0.9477029760184917,
721
+ "grad_norm": 49.76476287841797,
722
+ "learning_rate": 5.670103092783505e-08,
723
+ "logits/chosen": -0.47304850816726685,
724
+ "logits/rejected": -0.46452435851097107,
725
+ "logps/chosen": -148.42233276367188,
726
+ "logps/rejected": -157.0984649658203,
727
+ "loss": 1.5582,
728
+ "nll_loss": 0.42344313859939575,
729
+ "rewards/accuracies": 0.6031249761581421,
730
+ "rewards/chosen": -14.842233657836914,
731
+ "rewards/margins": 0.8676150441169739,
732
+ "rewards/rejected": -15.709848403930664,
733
+ "step": 410
734
+ },
735
+ {
736
+ "epoch": 0.9708176827506501,
737
+ "grad_norm": 45.05620574951172,
738
+ "learning_rate": 3.092783505154639e-08,
739
+ "logits/chosen": -0.5102072954177856,
740
+ "logits/rejected": -0.4865845739841461,
741
+ "logps/chosen": -150.34286499023438,
742
+ "logps/rejected": -156.001708984375,
743
+ "loss": 1.5674,
744
+ "nll_loss": 0.4283737242221832,
745
+ "rewards/accuracies": 0.590624988079071,
746
+ "rewards/chosen": -15.03428840637207,
747
+ "rewards/margins": 0.5658840537071228,
748
+ "rewards/rejected": -15.600171089172363,
749
+ "step": 420
750
+ },
751
+ {
752
+ "epoch": 0.9939323894828085,
753
+ "grad_norm": 46.765380859375,
754
+ "learning_rate": 5.154639175257731e-09,
755
+ "logits/chosen": -0.5253250002861023,
756
+ "logits/rejected": -0.534714937210083,
757
+ "logps/chosen": -157.79779052734375,
758
+ "logps/rejected": -164.88742065429688,
759
+ "loss": 1.645,
760
+ "nll_loss": 0.42819744348526,
761
+ "rewards/accuracies": 0.6031249761581421,
762
+ "rewards/chosen": -15.779779434204102,
763
+ "rewards/margins": 0.7089639902114868,
764
+ "rewards/rejected": -16.48874282836914,
765
+ "step": 430
766
+ },
767
+ {
768
+ "epoch": 0.9985553308292401,
769
+ "step": 432,
770
+ "total_flos": 0.0,
771
+ "train_loss": 1.7731637126869626,
772
+ "train_runtime": 10231.9294,
773
+ "train_samples_per_second": 5.412,
774
+ "train_steps_per_second": 0.042
775
+ }
776
+ ],
777
+ "logging_steps": 10,
778
+ "max_steps": 432,
779
+ "num_input_tokens_seen": 0,
780
+ "num_train_epochs": 1,
781
+ "save_steps": 100,
782
+ "stateful_callbacks": {
783
+ "TrainerControl": {
784
+ "args": {
785
+ "should_epoch_stop": false,
786
+ "should_evaluate": false,
787
+ "should_log": false,
788
+ "should_save": true,
789
+ "should_training_stop": true
790
+ },
791
+ "attributes": {}
792
+ }
793
+ },
794
+ "total_flos": 0.0,
795
+ "train_batch_size": 4,
796
+ "trial_name": null,
797
+ "trial_params": null
798
+ }