jikaixuan commited on
Commit
16bc4cf
1 Parent(s): cafd513

Model save

Browse files
README.md ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ library_name: peft
4
+ tags:
5
+ - trl
6
+ - dpo
7
+ - generated_from_trainer
8
+ base_model: mistralai/Mistral-7B-v0.1
9
+ model-index:
10
+ - name: zephyr-7b
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # zephyr-7b
18
+
19
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.6182
22
+ - Rewards/chosen: -0.4457
23
+ - Rewards/rejected: -0.8122
24
+ - Rewards/accuracies: 0.3672
25
+ - Rewards/margins: 0.3666
26
+ - Logps/rejected: -158.4149
27
+ - Logps/chosen: -108.4784
28
+ - Logits/rejected: 0.9580
29
+ - Logits/chosen: 0.9035
30
+ - Use Label: 0.0
31
+ - Pred Label: 0.0
32
+
33
+ ## Model description
34
+
35
+ More information needed
36
+
37
+ ## Intended uses & limitations
38
+
39
+ More information needed
40
+
41
+ ## Training and evaluation data
42
+
43
+ More information needed
44
+
45
+ ## Training procedure
46
+
47
+ ### Training hyperparameters
48
+
49
+ The following hyperparameters were used during training:
50
+ - learning_rate: 5e-06
51
+ - train_batch_size: 4
52
+ - eval_batch_size: 8
53
+ - seed: 42
54
+ - distributed_type: multi-GPU
55
+ - num_devices: 8
56
+ - gradient_accumulation_steps: 4
57
+ - total_train_batch_size: 128
58
+ - total_eval_batch_size: 64
59
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
60
+ - lr_scheduler_type: cosine
61
+ - lr_scheduler_warmup_ratio: 0.1
62
+ - num_epochs: 1
63
+
64
+ ### Training results
65
+
66
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
67
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
68
+ | 0.6553 | 0.21 | 100 | 0.6557 | -0.1267 | -0.2685 | 0.3633 | 0.1419 | -104.0477 | -76.5787 | -2.0726 | -2.0833 | 0.0 | 0.0 |
69
+ | 0.6446 | 0.42 | 200 | 0.6343 | -0.2873 | -0.5376 | 0.3828 | 0.2503 | -130.9503 | -92.6377 | -0.6864 | -0.7124 | 0.0 | 0.0 |
70
+ | 0.6273 | 0.63 | 300 | 0.6204 | -0.4623 | -0.7994 | 0.3672 | 0.3371 | -157.1332 | -110.1469 | 0.6726 | 0.6280 | 0.0 | 0.0 |
71
+ | 0.6165 | 0.84 | 400 | 0.6182 | -0.4457 | -0.8122 | 0.3672 | 0.3666 | -158.4149 | -108.4784 | 0.9580 | 0.9035 | 0.0 | 0.0 |
72
+
73
+
74
+ ### Framework versions
75
+
76
+ - PEFT 0.7.1
77
+ - Transformers 4.38.2
78
+ - Pytorch 2.1.1+cu121
79
+ - Datasets 2.14.6
80
+ - Tokenizers 0.15.2
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04c5de933a493d1f1d280a810bf7e139f8e2af8c2196674c46dbf2d33d4538dd
3
  size 671150064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d13d352181489b0c0d490b1f94e135d6ba5d33aebeb5eb98faf9c68d8f394b98
3
  size 671150064
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.6389844682481554,
4
+ "train_runtime": 9615.2592,
5
+ "train_samples": 61135,
6
+ "train_samples_per_second": 6.358,
7
+ "train_steps_per_second": 0.05
8
+ }
runs/Mar19_21-02-18_uclaml04.cs.ucla.edu/events.out.tfevents.1710907561.uclaml04.cs.ucla.edu.3973422.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4b9574ff73261c504912466cce39e49520b240d901176a5015650118a2a9f3b
3
- size 45084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cabb4ba084135335b7667de3d4fc284f7b992c10f172e6822a8adaca745838c7
3
+ size 47032
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.6389844682481554,
4
+ "train_runtime": 9615.2592,
5
+ "train_samples": 61135,
6
+ "train_samples_per_second": 6.358,
7
+ "train_steps_per_second": 0.05
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,918 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9984301412872841,
5
+ "eval_steps": 100,
6
+ "global_step": 477,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "grad_norm": 0.400390625,
14
+ "learning_rate": 1.0416666666666667e-07,
15
+ "logits/chosen": -2.2547454833984375,
16
+ "logits/rejected": -2.401865005493164,
17
+ "logps/chosen": -53.759212493896484,
18
+ "logps/rejected": -48.83185958862305,
19
+ "loss": 0.6931,
20
+ "pred_label": 0.0,
21
+ "rewards/accuracies": 0.0,
22
+ "rewards/chosen": 0.0,
23
+ "rewards/margins": 0.0,
24
+ "rewards/rejected": 0.0,
25
+ "step": 1,
26
+ "use_label": 0.0
27
+ },
28
+ {
29
+ "epoch": 0.02,
30
+ "grad_norm": 0.4609375,
31
+ "learning_rate": 1.0416666666666667e-06,
32
+ "logits/chosen": -2.242556571960449,
33
+ "logits/rejected": -2.277317762374878,
34
+ "logps/chosen": -51.96327209472656,
35
+ "logps/rejected": -64.98894500732422,
36
+ "loss": 0.6929,
37
+ "pred_label": 0.0,
38
+ "rewards/accuracies": 0.2361111044883728,
39
+ "rewards/chosen": 0.002160965697839856,
40
+ "rewards/margins": 0.0009470728691667318,
41
+ "rewards/rejected": 0.0012138929450884461,
42
+ "step": 10,
43
+ "use_label": 0.0
44
+ },
45
+ {
46
+ "epoch": 0.04,
47
+ "grad_norm": 0.396484375,
48
+ "learning_rate": 2.0833333333333334e-06,
49
+ "logits/chosen": -2.252474784851074,
50
+ "logits/rejected": -2.256141185760498,
51
+ "logps/chosen": -62.50165557861328,
52
+ "logps/rejected": -72.6328125,
53
+ "loss": 0.6919,
54
+ "pred_label": 0.0,
55
+ "rewards/accuracies": 0.28125,
56
+ "rewards/chosen": 0.01592240110039711,
57
+ "rewards/margins": 0.001004441175609827,
58
+ "rewards/rejected": 0.014917959459125996,
59
+ "step": 20,
60
+ "use_label": 0.0
61
+ },
62
+ {
63
+ "epoch": 0.06,
64
+ "grad_norm": 0.51171875,
65
+ "learning_rate": 3.125e-06,
66
+ "logits/chosen": -2.342515468597412,
67
+ "logits/rejected": -2.3552591800689697,
68
+ "logps/chosen": -79.15455627441406,
69
+ "logps/rejected": -98.8229751586914,
70
+ "loss": 0.6898,
71
+ "pred_label": 0.0,
72
+ "rewards/accuracies": 0.2874999940395355,
73
+ "rewards/chosen": 0.030873581767082214,
74
+ "rewards/margins": 0.002844910603016615,
75
+ "rewards/rejected": 0.02802867256104946,
76
+ "step": 30,
77
+ "use_label": 0.0
78
+ },
79
+ {
80
+ "epoch": 0.08,
81
+ "grad_norm": 0.51953125,
82
+ "learning_rate": 4.166666666666667e-06,
83
+ "logits/chosen": -2.323695421218872,
84
+ "logits/rejected": -2.3019304275512695,
85
+ "logps/chosen": -82.8508071899414,
86
+ "logps/rejected": -82.39540100097656,
87
+ "loss": 0.6866,
88
+ "pred_label": 0.0,
89
+ "rewards/accuracies": 0.2874999940395355,
90
+ "rewards/chosen": 0.033413294702768326,
91
+ "rewards/margins": 0.011912978254258633,
92
+ "rewards/rejected": 0.021500317379832268,
93
+ "step": 40,
94
+ "use_label": 0.0
95
+ },
96
+ {
97
+ "epoch": 0.1,
98
+ "grad_norm": 0.6640625,
99
+ "learning_rate": 4.999731868769027e-06,
100
+ "logits/chosen": -2.2408015727996826,
101
+ "logits/rejected": -2.2638282775878906,
102
+ "logps/chosen": -67.89698028564453,
103
+ "logps/rejected": -81.84117126464844,
104
+ "loss": 0.6805,
105
+ "pred_label": 0.0,
106
+ "rewards/accuracies": 0.32499998807907104,
107
+ "rewards/chosen": 0.009338948875665665,
108
+ "rewards/margins": 0.030354563146829605,
109
+ "rewards/rejected": -0.02101561427116394,
110
+ "step": 50,
111
+ "use_label": 0.0
112
+ },
113
+ {
114
+ "epoch": 0.13,
115
+ "grad_norm": 1.53125,
116
+ "learning_rate": 4.9903533134293035e-06,
117
+ "logits/chosen": -2.2194154262542725,
118
+ "logits/rejected": -2.1603574752807617,
119
+ "logps/chosen": -62.444313049316406,
120
+ "logps/rejected": -72.18606567382812,
121
+ "loss": 0.6753,
122
+ "pred_label": 0.0,
123
+ "rewards/accuracies": 0.30000001192092896,
124
+ "rewards/chosen": -0.027180707082152367,
125
+ "rewards/margins": 0.044989973306655884,
126
+ "rewards/rejected": -0.072170689702034,
127
+ "step": 60,
128
+ "use_label": 0.0
129
+ },
130
+ {
131
+ "epoch": 0.15,
132
+ "grad_norm": 1.84375,
133
+ "learning_rate": 4.967625656594782e-06,
134
+ "logits/chosen": -2.1111249923706055,
135
+ "logits/rejected": -2.109537124633789,
136
+ "logps/chosen": -62.041603088378906,
137
+ "logps/rejected": -75.64030456542969,
138
+ "loss": 0.666,
139
+ "pred_label": 0.0,
140
+ "rewards/accuracies": 0.25,
141
+ "rewards/chosen": -0.06330498307943344,
142
+ "rewards/margins": 0.03508424013853073,
143
+ "rewards/rejected": -0.09838922321796417,
144
+ "step": 70,
145
+ "use_label": 0.0
146
+ },
147
+ {
148
+ "epoch": 0.17,
149
+ "grad_norm": 1.03125,
150
+ "learning_rate": 4.93167072587771e-06,
151
+ "logits/chosen": -2.21980881690979,
152
+ "logits/rejected": -2.1616053581237793,
153
+ "logps/chosen": -60.844932556152344,
154
+ "logps/rejected": -74.95368957519531,
155
+ "loss": 0.66,
156
+ "pred_label": 0.0,
157
+ "rewards/accuracies": 0.26249998807907104,
158
+ "rewards/chosen": -0.12314031273126602,
159
+ "rewards/margins": 0.0946219339966774,
160
+ "rewards/rejected": -0.21776223182678223,
161
+ "step": 80,
162
+ "use_label": 0.0
163
+ },
164
+ {
165
+ "epoch": 0.19,
166
+ "grad_norm": 1.5390625,
167
+ "learning_rate": 4.882681251368549e-06,
168
+ "logits/chosen": -2.109405279159546,
169
+ "logits/rejected": -2.1181578636169434,
170
+ "logps/chosen": -77.24811553955078,
171
+ "logps/rejected": -95.32093811035156,
172
+ "loss": 0.6621,
173
+ "pred_label": 0.0,
174
+ "rewards/accuracies": 0.3062500059604645,
175
+ "rewards/chosen": -0.1886606067419052,
176
+ "rewards/margins": 0.07690713554620743,
177
+ "rewards/rejected": -0.26556771993637085,
178
+ "step": 90,
179
+ "use_label": 0.0
180
+ },
181
+ {
182
+ "epoch": 0.21,
183
+ "grad_norm": 1.1640625,
184
+ "learning_rate": 4.8209198325401815e-06,
185
+ "logits/chosen": -2.1972146034240723,
186
+ "logits/rejected": -2.169661283493042,
187
+ "logps/chosen": -92.16123962402344,
188
+ "logps/rejected": -84.31734466552734,
189
+ "loss": 0.6553,
190
+ "pred_label": 0.0,
191
+ "rewards/accuracies": 0.3499999940395355,
192
+ "rewards/chosen": -0.12131345272064209,
193
+ "rewards/margins": 0.08319222182035446,
194
+ "rewards/rejected": -0.20450565218925476,
195
+ "step": 100,
196
+ "use_label": 0.0
197
+ },
198
+ {
199
+ "epoch": 0.21,
200
+ "eval_logits/chosen": -2.0832693576812744,
201
+ "eval_logits/rejected": -2.0725808143615723,
202
+ "eval_logps/chosen": -76.57865905761719,
203
+ "eval_logps/rejected": -104.04773712158203,
204
+ "eval_loss": 0.6557236313819885,
205
+ "eval_pred_label": 0.0,
206
+ "eval_rewards/accuracies": 0.36328125,
207
+ "eval_rewards/chosen": -0.12666408717632294,
208
+ "eval_rewards/margins": 0.14188387989997864,
209
+ "eval_rewards/rejected": -0.26854798197746277,
210
+ "eval_runtime": 125.5075,
211
+ "eval_samples_per_second": 15.935,
212
+ "eval_steps_per_second": 0.255,
213
+ "eval_use_label": 0.0,
214
+ "step": 100
215
+ },
216
+ {
217
+ "epoch": 0.23,
218
+ "grad_norm": 1.1484375,
219
+ "learning_rate": 4.746717530629565e-06,
220
+ "logits/chosen": -2.125093460083008,
221
+ "logits/rejected": -2.108320713043213,
222
+ "logps/chosen": -86.47650146484375,
223
+ "logps/rejected": -108.77266693115234,
224
+ "loss": 0.6536,
225
+ "pred_label": 0.0,
226
+ "rewards/accuracies": 0.375,
227
+ "rewards/chosen": -0.1537572741508484,
228
+ "rewards/margins": 0.14806225895881653,
229
+ "rewards/rejected": -0.3018195331096649,
230
+ "step": 110,
231
+ "use_label": 0.0
232
+ },
233
+ {
234
+ "epoch": 0.25,
235
+ "grad_norm": 1.5390625,
236
+ "learning_rate": 4.660472094042121e-06,
237
+ "logits/chosen": -1.9497900009155273,
238
+ "logits/rejected": -1.8884683847427368,
239
+ "logps/chosen": -95.01170349121094,
240
+ "logps/rejected": -114.40583801269531,
241
+ "loss": 0.652,
242
+ "pred_label": 0.0,
243
+ "rewards/accuracies": 0.36250001192092896,
244
+ "rewards/chosen": -0.2506612241268158,
245
+ "rewards/margins": 0.16420678794384003,
246
+ "rewards/rejected": -0.414868026971817,
247
+ "step": 120,
248
+ "use_label": 0.0
249
+ },
250
+ {
251
+ "epoch": 0.27,
252
+ "grad_norm": 1.9296875,
253
+ "learning_rate": 4.5626458262912745e-06,
254
+ "logits/chosen": -1.7961517572402954,
255
+ "logits/rejected": -1.7706302404403687,
256
+ "logps/chosen": -90.99502563476562,
257
+ "logps/rejected": -112.71142578125,
258
+ "loss": 0.654,
259
+ "pred_label": 0.0,
260
+ "rewards/accuracies": 0.34375,
261
+ "rewards/chosen": -0.2521664500236511,
262
+ "rewards/margins": 0.1464831829071045,
263
+ "rewards/rejected": -0.3986496329307556,
264
+ "step": 130,
265
+ "use_label": 0.0
266
+ },
267
+ {
268
+ "epoch": 0.29,
269
+ "grad_norm": 1.9921875,
270
+ "learning_rate": 4.453763107901676e-06,
271
+ "logits/chosen": -1.7561969757080078,
272
+ "logits/rejected": -1.796431541442871,
273
+ "logps/chosen": -96.94844818115234,
274
+ "logps/rejected": -107.52276611328125,
275
+ "loss": 0.6488,
276
+ "pred_label": 0.0,
277
+ "rewards/accuracies": 0.26875001192092896,
278
+ "rewards/chosen": -0.1620088815689087,
279
+ "rewards/margins": 0.12216176092624664,
280
+ "rewards/rejected": -0.28417062759399414,
281
+ "step": 140,
282
+ "use_label": 0.0
283
+ },
284
+ {
285
+ "epoch": 0.31,
286
+ "grad_norm": 1.7578125,
287
+ "learning_rate": 4.33440758555951e-06,
288
+ "logits/chosen": -1.7516326904296875,
289
+ "logits/rejected": -1.7187411785125732,
290
+ "logps/chosen": -78.70259857177734,
291
+ "logps/rejected": -104.34063720703125,
292
+ "loss": 0.6451,
293
+ "pred_label": 0.0,
294
+ "rewards/accuracies": 0.32499998807907104,
295
+ "rewards/chosen": -0.13555890321731567,
296
+ "rewards/margins": 0.22945857048034668,
297
+ "rewards/rejected": -0.36501747369766235,
298
+ "step": 150,
299
+ "use_label": 0.0
300
+ },
301
+ {
302
+ "epoch": 0.33,
303
+ "grad_norm": 2.640625,
304
+ "learning_rate": 4.205219043576955e-06,
305
+ "logits/chosen": -1.481575608253479,
306
+ "logits/rejected": -1.468014121055603,
307
+ "logps/chosen": -100.68672180175781,
308
+ "logps/rejected": -127.04164123535156,
309
+ "loss": 0.6442,
310
+ "pred_label": 0.0,
311
+ "rewards/accuracies": 0.29374998807907104,
312
+ "rewards/chosen": -0.36356669664382935,
313
+ "rewards/margins": 0.1327240914106369,
314
+ "rewards/rejected": -0.49629077315330505,
315
+ "step": 160,
316
+ "use_label": 0.0
317
+ },
318
+ {
319
+ "epoch": 0.36,
320
+ "grad_norm": 2.390625,
321
+ "learning_rate": 4.066889974440757e-06,
322
+ "logits/chosen": -0.9005377888679504,
323
+ "logits/rejected": -0.8864371180534363,
324
+ "logps/chosen": -85.81999206542969,
325
+ "logps/rejected": -110.4801254272461,
326
+ "loss": 0.6339,
327
+ "pred_label": 0.0,
328
+ "rewards/accuracies": 0.30000001192092896,
329
+ "rewards/chosen": -0.3031192421913147,
330
+ "rewards/margins": 0.1594724804162979,
331
+ "rewards/rejected": -0.4625917375087738,
332
+ "step": 170,
333
+ "use_label": 0.0
334
+ },
335
+ {
336
+ "epoch": 0.38,
337
+ "grad_norm": 2.78125,
338
+ "learning_rate": 3.92016186682789e-06,
339
+ "logits/chosen": -0.591436505317688,
340
+ "logits/rejected": -0.5489451885223389,
341
+ "logps/chosen": -103.7041015625,
342
+ "logps/rejected": -123.32816314697266,
343
+ "loss": 0.6554,
344
+ "pred_label": 0.0,
345
+ "rewards/accuracies": 0.3375000059604645,
346
+ "rewards/chosen": -0.40916457772254944,
347
+ "rewards/margins": 0.2612735629081726,
348
+ "rewards/rejected": -0.6704381108283997,
349
+ "step": 180,
350
+ "use_label": 0.0
351
+ },
352
+ {
353
+ "epoch": 0.4,
354
+ "grad_norm": 2.09375,
355
+ "learning_rate": 3.7658212309857576e-06,
356
+ "logits/chosen": -0.801749587059021,
357
+ "logits/rejected": -0.588916003704071,
358
+ "logps/chosen": -96.86283874511719,
359
+ "logps/rejected": -123.17811584472656,
360
+ "loss": 0.6508,
361
+ "pred_label": 0.0,
362
+ "rewards/accuracies": 0.3062500059604645,
363
+ "rewards/chosen": -0.37751203775405884,
364
+ "rewards/margins": 0.21026258170604706,
365
+ "rewards/rejected": -0.5877746343612671,
366
+ "step": 190,
367
+ "use_label": 0.0
368
+ },
369
+ {
370
+ "epoch": 0.42,
371
+ "grad_norm": 1.59375,
372
+ "learning_rate": 3.604695382782159e-06,
373
+ "logits/chosen": -1.114527940750122,
374
+ "logits/rejected": -1.0130901336669922,
375
+ "logps/chosen": -111.54571533203125,
376
+ "logps/rejected": -115.97926330566406,
377
+ "loss": 0.6446,
378
+ "pred_label": 0.0,
379
+ "rewards/accuracies": 0.3187499940395355,
380
+ "rewards/chosen": -0.2986941933631897,
381
+ "rewards/margins": 0.1296522319316864,
382
+ "rewards/rejected": -0.4283464550971985,
383
+ "step": 200,
384
+ "use_label": 0.0
385
+ },
386
+ {
387
+ "epoch": 0.42,
388
+ "eval_logits/chosen": -0.7123901844024658,
389
+ "eval_logits/rejected": -0.6864092350006104,
390
+ "eval_logps/chosen": -92.6377182006836,
391
+ "eval_logps/rejected": -130.9503173828125,
392
+ "eval_loss": 0.6342783570289612,
393
+ "eval_pred_label": 0.0,
394
+ "eval_rewards/accuracies": 0.3828125,
395
+ "eval_rewards/chosen": -0.28725457191467285,
396
+ "eval_rewards/margins": 0.250319242477417,
397
+ "eval_rewards/rejected": -0.5375738143920898,
398
+ "eval_runtime": 125.6586,
399
+ "eval_samples_per_second": 15.916,
400
+ "eval_steps_per_second": 0.255,
401
+ "eval_use_label": 0.0,
402
+ "step": 200
403
+ },
404
+ {
405
+ "epoch": 0.44,
406
+ "grad_norm": 2.140625,
407
+ "learning_rate": 3.437648009023905e-06,
408
+ "logits/chosen": -0.6364002227783203,
409
+ "logits/rejected": -0.629191517829895,
410
+ "logps/chosen": -79.12034606933594,
411
+ "logps/rejected": -109.35395812988281,
412
+ "loss": 0.6319,
413
+ "pred_label": 0.0,
414
+ "rewards/accuracies": 0.35624998807907104,
415
+ "rewards/chosen": -0.23145589232444763,
416
+ "rewards/margins": 0.2322908192873001,
417
+ "rewards/rejected": -0.46374672651290894,
418
+ "step": 210,
419
+ "use_label": 0.0
420
+ },
421
+ {
422
+ "epoch": 0.46,
423
+ "grad_norm": 2.453125,
424
+ "learning_rate": 3.265574537815398e-06,
425
+ "logits/chosen": -0.24914255738258362,
426
+ "logits/rejected": -0.12895795702934265,
427
+ "logps/chosen": -123.09925842285156,
428
+ "logps/rejected": -127.96968078613281,
429
+ "loss": 0.633,
430
+ "pred_label": 0.0,
431
+ "rewards/accuracies": 0.3187499940395355,
432
+ "rewards/chosen": -0.43470579385757446,
433
+ "rewards/margins": 0.1813107430934906,
434
+ "rewards/rejected": -0.6160165071487427,
435
+ "step": 220,
436
+ "use_label": 0.0
437
+ },
438
+ {
439
+ "epoch": 0.48,
440
+ "grad_norm": 2.734375,
441
+ "learning_rate": 3.089397338773569e-06,
442
+ "logits/chosen": 0.08423249423503876,
443
+ "logits/rejected": 0.1725344955921173,
444
+ "logps/chosen": -98.91605377197266,
445
+ "logps/rejected": -125.9875259399414,
446
+ "loss": 0.6278,
447
+ "pred_label": 0.0,
448
+ "rewards/accuracies": 0.33125001192092896,
449
+ "rewards/chosen": -0.3448147773742676,
450
+ "rewards/margins": 0.287472665309906,
451
+ "rewards/rejected": -0.6322874426841736,
452
+ "step": 230,
453
+ "use_label": 0.0
454
+ },
455
+ {
456
+ "epoch": 0.5,
457
+ "grad_norm": 2.015625,
458
+ "learning_rate": 2.9100607788275547e-06,
459
+ "logits/chosen": 0.48232460021972656,
460
+ "logits/rejected": 0.39376580715179443,
461
+ "logps/chosen": -108.98759460449219,
462
+ "logps/rejected": -142.29344177246094,
463
+ "loss": 0.6294,
464
+ "pred_label": 0.0,
465
+ "rewards/accuracies": 0.3812499940395355,
466
+ "rewards/chosen": -0.39955058693885803,
467
+ "rewards/margins": 0.28114694356918335,
468
+ "rewards/rejected": -0.680697500705719,
469
+ "step": 240,
470
+ "use_label": 0.0
471
+ },
472
+ {
473
+ "epoch": 0.52,
474
+ "grad_norm": 2.25,
475
+ "learning_rate": 2.72852616010567e-06,
476
+ "logits/chosen": 0.35806649923324585,
477
+ "logits/rejected": 0.41671887040138245,
478
+ "logps/chosen": -126.65348052978516,
479
+ "logps/rejected": -151.3179168701172,
480
+ "loss": 0.6419,
481
+ "pred_label": 0.0,
482
+ "rewards/accuracies": 0.36250001192092896,
483
+ "rewards/chosen": -0.5325437784194946,
484
+ "rewards/margins": 0.28831106424331665,
485
+ "rewards/rejected": -0.8208548426628113,
486
+ "step": 250,
487
+ "use_label": 0.0
488
+ },
489
+ {
490
+ "epoch": 0.54,
491
+ "grad_norm": 2.46875,
492
+ "learning_rate": 2.5457665670441937e-06,
493
+ "logits/chosen": 0.4644729197025299,
494
+ "logits/rejected": 0.45051756501197815,
495
+ "logps/chosen": -110.62007904052734,
496
+ "logps/rejected": -142.76722717285156,
497
+ "loss": 0.6232,
498
+ "pred_label": 0.0,
499
+ "rewards/accuracies": 0.32499998807907104,
500
+ "rewards/chosen": -0.4451447129249573,
501
+ "rewards/margins": 0.2380482256412506,
502
+ "rewards/rejected": -0.6831929087638855,
503
+ "step": 260,
504
+ "use_label": 0.0
505
+ },
506
+ {
507
+ "epoch": 0.57,
508
+ "grad_norm": 2.4375,
509
+ "learning_rate": 2.3627616503391813e-06,
510
+ "logits/chosen": 0.6336380839347839,
511
+ "logits/rejected": 0.5556719303131104,
512
+ "logps/chosen": -116.7416000366211,
513
+ "logps/rejected": -135.33096313476562,
514
+ "loss": 0.6174,
515
+ "pred_label": 0.0,
516
+ "rewards/accuracies": 0.3375000059604645,
517
+ "rewards/chosen": -0.43825817108154297,
518
+ "rewards/margins": 0.22129836678504944,
519
+ "rewards/rejected": -0.65955650806427,
520
+ "step": 270,
521
+ "use_label": 0.0
522
+ },
523
+ {
524
+ "epoch": 0.59,
525
+ "grad_norm": 3.0625,
526
+ "learning_rate": 2.1804923757009885e-06,
527
+ "logits/chosen": 0.6383472681045532,
528
+ "logits/rejected": 0.7697634100914001,
529
+ "logps/chosen": -106.45858001708984,
530
+ "logps/rejected": -125.5028305053711,
531
+ "loss": 0.6353,
532
+ "pred_label": 0.0,
533
+ "rewards/accuracies": 0.30000001192092896,
534
+ "rewards/chosen": -0.4095306992530823,
535
+ "rewards/margins": 0.21630148589611053,
536
+ "rewards/rejected": -0.625832200050354,
537
+ "step": 280,
538
+ "use_label": 0.0
539
+ },
540
+ {
541
+ "epoch": 0.61,
542
+ "grad_norm": 3.328125,
543
+ "learning_rate": 1.9999357655598894e-06,
544
+ "logits/chosen": 0.1407470554113388,
545
+ "logits/rejected": 0.12877413630485535,
546
+ "logps/chosen": -108.0340805053711,
547
+ "logps/rejected": -136.49562072753906,
548
+ "loss": 0.6265,
549
+ "pred_label": 0.0,
550
+ "rewards/accuracies": 0.30000001192092896,
551
+ "rewards/chosen": -0.41485634446144104,
552
+ "rewards/margins": 0.18648667633533478,
553
+ "rewards/rejected": -0.601343035697937,
554
+ "step": 290,
555
+ "use_label": 0.0
556
+ },
557
+ {
558
+ "epoch": 0.63,
559
+ "grad_norm": 3.03125,
560
+ "learning_rate": 1.8220596619089576e-06,
561
+ "logits/chosen": 0.4002162516117096,
562
+ "logits/rejected": 0.25351682305336,
563
+ "logps/chosen": -127.95108795166016,
564
+ "logps/rejected": -172.98793029785156,
565
+ "loss": 0.6273,
566
+ "pred_label": 0.0,
567
+ "rewards/accuracies": 0.4124999940395355,
568
+ "rewards/chosen": -0.5035675168037415,
569
+ "rewards/margins": 0.2851078510284424,
570
+ "rewards/rejected": -0.7886753678321838,
571
+ "step": 300,
572
+ "use_label": 0.0
573
+ },
574
+ {
575
+ "epoch": 0.63,
576
+ "eval_logits/chosen": 0.6280341148376465,
577
+ "eval_logits/rejected": 0.6725929379463196,
578
+ "eval_logps/chosen": -110.14692687988281,
579
+ "eval_logps/rejected": -157.1332244873047,
580
+ "eval_loss": 0.620426595211029,
581
+ "eval_pred_label": 0.0,
582
+ "eval_rewards/accuracies": 0.3671875,
583
+ "eval_rewards/chosen": -0.46234679222106934,
584
+ "eval_rewards/margins": 0.33705610036849976,
585
+ "eval_rewards/rejected": -0.7994028329849243,
586
+ "eval_runtime": 125.7299,
587
+ "eval_samples_per_second": 15.907,
588
+ "eval_steps_per_second": 0.255,
589
+ "eval_use_label": 0.0,
590
+ "step": 300
591
+ },
592
+ {
593
+ "epoch": 0.65,
594
+ "grad_norm": 2.390625,
595
+ "learning_rate": 1.647817538357072e-06,
596
+ "logits/chosen": 0.33872538805007935,
597
+ "logits/rejected": 0.3415250778198242,
598
+ "logps/chosen": -95.08795166015625,
599
+ "logps/rejected": -142.95713806152344,
600
+ "loss": 0.6014,
601
+ "pred_label": 0.0,
602
+ "rewards/accuracies": 0.375,
603
+ "rewards/chosen": -0.39491352438926697,
604
+ "rewards/margins": 0.35215410590171814,
605
+ "rewards/rejected": -0.7470676302909851,
606
+ "step": 310,
607
+ "use_label": 0.0
608
+ },
609
+ {
610
+ "epoch": 0.67,
611
+ "grad_norm": 2.546875,
612
+ "learning_rate": 1.4781433892011132e-06,
613
+ "logits/chosen": 0.2642754018306732,
614
+ "logits/rejected": 0.4063233435153961,
615
+ "logps/chosen": -131.07791137695312,
616
+ "logps/rejected": -164.12667846679688,
617
+ "loss": 0.6133,
618
+ "pred_label": 0.0,
619
+ "rewards/accuracies": 0.38749998807907104,
620
+ "rewards/chosen": -0.580074667930603,
621
+ "rewards/margins": 0.38923436403274536,
622
+ "rewards/rejected": -0.9693089723587036,
623
+ "step": 320,
624
+ "use_label": 0.0
625
+ },
626
+ {
627
+ "epoch": 0.69,
628
+ "grad_norm": 3.15625,
629
+ "learning_rate": 1.3139467229135999e-06,
630
+ "logits/chosen": 0.5224499106407166,
631
+ "logits/rejected": 0.5213581919670105,
632
+ "logps/chosen": -130.00186157226562,
633
+ "logps/rejected": -156.6516876220703,
634
+ "loss": 0.6387,
635
+ "pred_label": 0.0,
636
+ "rewards/accuracies": 0.3375000059604645,
637
+ "rewards/chosen": -0.5989372134208679,
638
+ "rewards/margins": 0.2814994752407074,
639
+ "rewards/rejected": -0.8804367184638977,
640
+ "step": 330,
641
+ "use_label": 0.0
642
+ },
643
+ {
644
+ "epoch": 0.71,
645
+ "grad_norm": 2.28125,
646
+ "learning_rate": 1.1561076868822756e-06,
647
+ "logits/chosen": 0.1671726554632187,
648
+ "logits/rejected": 0.0974355936050415,
649
+ "logps/chosen": -140.3222198486328,
650
+ "logps/rejected": -155.46217346191406,
651
+ "loss": 0.6252,
652
+ "pred_label": 0.0,
653
+ "rewards/accuracies": 0.3375000059604645,
654
+ "rewards/chosen": -0.5558302998542786,
655
+ "rewards/margins": 0.23368898034095764,
656
+ "rewards/rejected": -0.7895193099975586,
657
+ "step": 340,
658
+ "use_label": 0.0
659
+ },
660
+ {
661
+ "epoch": 0.73,
662
+ "grad_norm": 3.328125,
663
+ "learning_rate": 1.0054723495346484e-06,
664
+ "logits/chosen": 0.081739641726017,
665
+ "logits/rejected": 0.08175826817750931,
666
+ "logps/chosen": -150.41506958007812,
667
+ "logps/rejected": -178.51565551757812,
668
+ "loss": 0.6231,
669
+ "pred_label": 0.0,
670
+ "rewards/accuracies": 0.375,
671
+ "rewards/chosen": -0.6099845170974731,
672
+ "rewards/margins": 0.322490930557251,
673
+ "rewards/rejected": -0.9324753880500793,
674
+ "step": 350,
675
+ "use_label": 0.0
676
+ },
677
+ {
678
+ "epoch": 0.75,
679
+ "grad_norm": 1.875,
680
+ "learning_rate": 8.628481651367876e-07,
681
+ "logits/chosen": 0.12279005348682404,
682
+ "logits/rejected": 0.20824797451496124,
683
+ "logps/chosen": -110.51042175292969,
684
+ "logps/rejected": -153.92698669433594,
685
+ "loss": 0.6186,
686
+ "pred_label": 0.0,
687
+ "rewards/accuracies": 0.3687500059604645,
688
+ "rewards/chosen": -0.46872806549072266,
689
+ "rewards/margins": 0.3482593894004822,
690
+ "rewards/rejected": -0.8169875144958496,
691
+ "step": 360,
692
+ "use_label": 0.0
693
+ },
694
+ {
695
+ "epoch": 0.77,
696
+ "grad_norm": 2.15625,
697
+ "learning_rate": 7.289996455765749e-07,
698
+ "logits/chosen": 0.19759848713874817,
699
+ "logits/rejected": 0.29472407698631287,
700
+ "logps/chosen": -103.1863021850586,
701
+ "logps/rejected": -143.578125,
702
+ "loss": 0.6166,
703
+ "pred_label": 0.0,
704
+ "rewards/accuracies": 0.3687500059604645,
705
+ "rewards/chosen": -0.37751519680023193,
706
+ "rewards/margins": 0.37911203503608704,
707
+ "rewards/rejected": -0.7566272020339966,
708
+ "step": 370,
709
+ "use_label": 0.0
710
+ },
711
+ {
712
+ "epoch": 0.8,
713
+ "grad_norm": 1.96875,
714
+ "learning_rate": 6.046442623320145e-07,
715
+ "logits/chosen": 0.03893072158098221,
716
+ "logits/rejected": 0.019468214362859726,
717
+ "logps/chosen": -108.17799377441406,
718
+ "logps/rejected": -158.08056640625,
719
+ "loss": 0.6183,
720
+ "pred_label": 0.0,
721
+ "rewards/accuracies": 0.3187499940395355,
722
+ "rewards/chosen": -0.42342591285705566,
723
+ "rewards/margins": 0.2937392592430115,
724
+ "rewards/rejected": -0.7171651124954224,
725
+ "step": 380,
726
+ "use_label": 0.0
727
+ },
728
+ {
729
+ "epoch": 0.82,
730
+ "grad_norm": 2.59375,
731
+ "learning_rate": 4.904486005914027e-07,
732
+ "logits/chosen": 0.33429718017578125,
733
+ "logits/rejected": 0.08158789575099945,
734
+ "logps/chosen": -151.29055786132812,
735
+ "logps/rejected": -180.48861694335938,
736
+ "loss": 0.6114,
737
+ "pred_label": 0.0,
738
+ "rewards/accuracies": 0.39375001192092896,
739
+ "rewards/chosen": -0.5847219824790955,
740
+ "rewards/margins": 0.3904651999473572,
741
+ "rewards/rejected": -0.9751871824264526,
742
+ "step": 390,
743
+ "use_label": 0.0
744
+ },
745
+ {
746
+ "epoch": 0.84,
747
+ "grad_norm": 2.015625,
748
+ "learning_rate": 3.8702478614051353e-07,
749
+ "logits/chosen": 0.126608207821846,
750
+ "logits/rejected": 0.2576550841331482,
751
+ "logps/chosen": -109.39167785644531,
752
+ "logps/rejected": -134.27053833007812,
753
+ "loss": 0.6165,
754
+ "pred_label": 0.0,
755
+ "rewards/accuracies": 0.38749998807907104,
756
+ "rewards/chosen": -0.36900678277015686,
757
+ "rewards/margins": 0.3390708863735199,
758
+ "rewards/rejected": -0.708077609539032,
759
+ "step": 400,
760
+ "use_label": 0.0
761
+ },
762
+ {
763
+ "epoch": 0.84,
764
+ "eval_logits/chosen": 0.903490424156189,
765
+ "eval_logits/rejected": 0.958048939704895,
766
+ "eval_logps/chosen": -108.47840881347656,
767
+ "eval_logps/rejected": -158.4149169921875,
768
+ "eval_loss": 0.6182093620300293,
769
+ "eval_pred_label": 0.0,
770
+ "eval_rewards/accuracies": 0.3671875,
771
+ "eval_rewards/chosen": -0.4456615447998047,
772
+ "eval_rewards/margins": 0.3665582537651062,
773
+ "eval_rewards/rejected": -0.8122197389602661,
774
+ "eval_runtime": 125.7278,
775
+ "eval_samples_per_second": 15.907,
776
+ "eval_steps_per_second": 0.255,
777
+ "eval_use_label": 0.0,
778
+ "step": 400
779
+ },
780
+ {
781
+ "epoch": 0.86,
782
+ "grad_norm": 2.046875,
783
+ "learning_rate": 2.9492720416985004e-07,
784
+ "logits/chosen": 0.39335688948631287,
785
+ "logits/rejected": 0.41703349351882935,
786
+ "logps/chosen": -106.9058837890625,
787
+ "logps/rejected": -138.57296752929688,
788
+ "loss": 0.6272,
789
+ "pred_label": 0.0,
790
+ "rewards/accuracies": 0.36250001192092896,
791
+ "rewards/chosen": -0.45482879877090454,
792
+ "rewards/margins": 0.3337084650993347,
793
+ "rewards/rejected": -0.788537323474884,
794
+ "step": 410,
795
+ "use_label": 0.0
796
+ },
797
+ {
798
+ "epoch": 0.88,
799
+ "grad_norm": 2.078125,
800
+ "learning_rate": 2.1464952759020857e-07,
801
+ "logits/chosen": 0.5264393091201782,
802
+ "logits/rejected": 0.4952784478664398,
803
+ "logps/chosen": -104.27522277832031,
804
+ "logps/rejected": -112.507080078125,
805
+ "loss": 0.6235,
806
+ "pred_label": 0.0,
807
+ "rewards/accuracies": 0.2750000059604645,
808
+ "rewards/chosen": -0.4333609640598297,
809
+ "rewards/margins": 0.1778794825077057,
810
+ "rewards/rejected": -0.6112405061721802,
811
+ "step": 420,
812
+ "use_label": 0.0
813
+ },
814
+ {
815
+ "epoch": 0.9,
816
+ "grad_norm": 1.734375,
817
+ "learning_rate": 1.4662207078575685e-07,
818
+ "logits/chosen": 0.47332754731178284,
819
+ "logits/rejected": 0.4613571763038635,
820
+ "logps/chosen": -144.65744018554688,
821
+ "logps/rejected": -170.08921813964844,
822
+ "loss": 0.5988,
823
+ "pred_label": 0.0,
824
+ "rewards/accuracies": 0.45625001192092896,
825
+ "rewards/chosen": -0.4539059102535248,
826
+ "rewards/margins": 0.4534150958061218,
827
+ "rewards/rejected": -0.9073210954666138,
828
+ "step": 430,
829
+ "use_label": 0.0
830
+ },
831
+ {
832
+ "epoch": 0.92,
833
+ "grad_norm": 1.9609375,
834
+ "learning_rate": 9.120948298936422e-08,
835
+ "logits/chosen": 0.48202329874038696,
836
+ "logits/rejected": 0.6259401440620422,
837
+ "logps/chosen": -114.15118408203125,
838
+ "logps/rejected": -161.5361785888672,
839
+ "loss": 0.6098,
840
+ "pred_label": 0.0,
841
+ "rewards/accuracies": 0.36250001192092896,
842
+ "rewards/chosen": -0.4724721908569336,
843
+ "rewards/margins": 0.39225998520851135,
844
+ "rewards/rejected": -0.8647321462631226,
845
+ "step": 440,
846
+ "use_label": 0.0
847
+ },
848
+ {
849
+ "epoch": 0.94,
850
+ "grad_norm": 2.265625,
851
+ "learning_rate": 4.870879364444109e-08,
852
+ "logits/chosen": 0.8100695610046387,
853
+ "logits/rejected": 0.5903851389884949,
854
+ "logps/chosen": -126.81998443603516,
855
+ "logps/rejected": -174.6106719970703,
856
+ "loss": 0.6122,
857
+ "pred_label": 0.0,
858
+ "rewards/accuracies": 0.36250001192092896,
859
+ "rewards/chosen": -0.5456215739250183,
860
+ "rewards/margins": 0.3175886273384094,
861
+ "rewards/rejected": -0.8632103204727173,
862
+ "step": 450,
863
+ "use_label": 0.0
864
+ },
865
+ {
866
+ "epoch": 0.96,
867
+ "grad_norm": 2.140625,
868
+ "learning_rate": 1.93478202307823e-08,
869
+ "logits/chosen": 0.7001665830612183,
870
+ "logits/rejected": 0.7000536322593689,
871
+ "logps/chosen": -80.71357727050781,
872
+ "logps/rejected": -126.110595703125,
873
+ "loss": 0.6182,
874
+ "pred_label": 0.0,
875
+ "rewards/accuracies": 0.32499998807907104,
876
+ "rewards/chosen": -0.3459371328353882,
877
+ "rewards/margins": 0.2817174792289734,
878
+ "rewards/rejected": -0.6276546716690063,
879
+ "step": 460,
880
+ "use_label": 0.0
881
+ },
882
+ {
883
+ "epoch": 0.98,
884
+ "grad_norm": 2.78125,
885
+ "learning_rate": 3.283947088983663e-09,
886
+ "logits/chosen": 0.7130995392799377,
887
+ "logits/rejected": 0.5145190954208374,
888
+ "logps/chosen": -110.40830993652344,
889
+ "logps/rejected": -137.49429321289062,
890
+ "loss": 0.6251,
891
+ "pred_label": 0.0,
892
+ "rewards/accuracies": 0.3125,
893
+ "rewards/chosen": -0.43079155683517456,
894
+ "rewards/margins": 0.25358152389526367,
895
+ "rewards/rejected": -0.6843730211257935,
896
+ "step": 470,
897
+ "use_label": 0.0
898
+ },
899
+ {
900
+ "epoch": 1.0,
901
+ "step": 477,
902
+ "total_flos": 0.0,
903
+ "train_loss": 0.6389844682481554,
904
+ "train_runtime": 9615.2592,
905
+ "train_samples_per_second": 6.358,
906
+ "train_steps_per_second": 0.05
907
+ }
908
+ ],
909
+ "logging_steps": 10,
910
+ "max_steps": 477,
911
+ "num_input_tokens_seen": 0,
912
+ "num_train_epochs": 1,
913
+ "save_steps": 50,
914
+ "total_flos": 0.0,
915
+ "train_batch_size": 4,
916
+ "trial_name": null,
917
+ "trial_params": null
918
+ }