RikkiXu commited on
Commit
d0048c4
1 Parent(s): c3149c4

Model save

Browse files
README.md CHANGED
@@ -1,16 +1,8 @@
1
  ---
2
- license: apache-2.0
3
- base_model: alignment-handbook/zephyr-7b-sft-full
4
  tags:
5
- - alignment-handbook
6
  - trl
7
  - dpo
8
  - generated_from_trainer
9
- - trl
10
- - dpo
11
- - generated_from_trainer
12
- datasets:
13
- - HuggingFaceH4/ultrafeedback_binarized
14
  model-index:
15
  - name: zephyr-7b-dpo-full
16
  results: []
@@ -21,17 +13,17 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  # zephyr-7b-dpo-full
23
 
24
- This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the HuggingFaceH4/ultrafeedback_binarized dataset.
25
  It achieves the following results on the evaluation set:
26
- - Loss: 0.5352
27
- - Rewards/chosen: 0.5766
28
- - Rewards/rejected: -0.3207
29
- - Rewards/accuracies: 0.7617
30
- - Rewards/margins: 0.8972
31
- - Logps/rejected: -269.0807
32
- - Logps/chosen: -251.0624
33
- - Logits/rejected: -2.4374
34
- - Logits/chosen: -2.4784
35
 
36
  ## Model description
37
 
@@ -68,10 +60,15 @@ The following hyperparameters were used during training:
68
 
69
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
70
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
71
- | 0.565 | 0.21 | 100 | 0.5718 | 0.5950 | -0.0056 | 0.7383 | 0.6006 | -262.7792 | -250.6930 | -2.5105 | -2.5504 |
72
- | 0.5467 | 0.42 | 200 | 0.5433 | 0.6478 | -0.1115 | 0.7461 | 0.7594 | -264.8979 | -249.6371 | -2.4783 | -2.5179 |
73
- | 0.517 | 0.63 | 300 | 0.5370 | 0.5686 | -0.2689 | 0.7695 | 0.8374 | -268.0445 | -251.2220 | -2.5203 | -2.5623 |
74
- | 0.518 | 0.84 | 400 | 0.5348 | 0.6286 | -0.2212 | 0.7539 | 0.8498 | -267.0915 | -250.0218 | -2.4324 | -2.4731 |
 
 
 
 
 
75
 
76
 
77
  ### Framework versions
 
1
  ---
 
 
2
  tags:
 
3
  - trl
4
  - dpo
5
  - generated_from_trainer
 
 
 
 
 
6
  model-index:
7
  - name: zephyr-7b-dpo-full
8
  results: []
 
13
 
14
  # zephyr-7b-dpo-full
15
 
16
+ This model was trained from scratch on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 1.8979
19
+ - Rewards/chosen: -6.9869
20
+ - Rewards/rejected: -8.4701
21
+ - Rewards/accuracies: 0.6094
22
+ - Rewards/margins: 1.4832
23
+ - Logps/rejected: -1164.5387
24
+ - Logps/chosen: -1010.4669
25
+ - Logits/rejected: -0.5643
26
+ - Logits/chosen: -0.7199
27
 
28
  ## Model description
29
 
 
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
+ | 0.2555 | 0.1 | 100 | 1.4172 | -4.8884 | -5.6701 | 0.5898 | 0.7817 | -884.5335 | -800.6121 | -1.3358 | -1.3942 |
64
+ | 0.1854 | 0.21 | 200 | 1.6754 | -6.1508 | -7.3259 | 0.6211 | 1.1752 | -1050.1200 | -926.8517 | -1.1088 | -1.1853 |
65
+ | 0.1799 | 0.31 | 300 | 1.5590 | -5.9157 | -6.9794 | 0.5977 | 1.0637 | -1015.4615 | -903.3419 | -1.0193 | -1.1110 |
66
+ | 0.1679 | 0.42 | 400 | 2.1030 | -7.8503 | -9.2060 | 0.6094 | 1.3557 | -1238.1252 | -1096.8108 | -0.5753 | -0.7096 |
67
+ | 0.1693 | 0.52 | 500 | 1.6563 | -6.3408 | -7.6718 | 0.625 | 1.3310 | -1084.7078 | -945.8611 | -0.8598 | -0.9873 |
68
+ | 0.1609 | 0.63 | 600 | 1.6818 | -6.4795 | -7.7992 | 0.6211 | 1.3198 | -1097.4480 | -959.7227 | -0.4515 | -0.6164 |
69
+ | 0.1559 | 0.73 | 700 | 1.9278 | -7.3485 | -8.7955 | 0.6133 | 1.4470 | -1197.0731 | -1046.6217 | -0.4166 | -0.5852 |
70
+ | 0.1433 | 0.84 | 800 | 1.9050 | -7.1496 | -8.6252 | 0.6172 | 1.4756 | -1180.0403 | -1026.7318 | -0.5141 | -0.6745 |
71
+ | 0.1479 | 0.94 | 900 | 1.8979 | -6.9869 | -8.4701 | 0.6094 | 1.4832 | -1164.5387 | -1010.4669 | -0.5643 | -0.7199 |
72
 
73
 
74
  ### Framework versions
all_results.json CHANGED
@@ -1,21 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -2.4783902168273926,
4
- "eval_logits/rejected": -2.4373888969421387,
5
- "eval_logps/chosen": -251.0624237060547,
6
- "eval_logps/rejected": -269.08074951171875,
7
- "eval_loss": 0.5351515412330627,
8
- "eval_rewards/accuracies": 0.76171875,
9
- "eval_rewards/chosen": 0.5765520930290222,
10
- "eval_rewards/margins": 0.8972306251525879,
11
- "eval_rewards/rejected": -0.3206784725189209,
12
- "eval_runtime": 96.1948,
13
- "eval_samples": 2000,
14
- "eval_samples_per_second": 20.791,
15
- "eval_steps_per_second": 0.333,
16
- "train_loss": 0.5478911828795238,
17
- "train_runtime": 7553.9268,
18
- "train_samples": 61134,
19
- "train_samples_per_second": 8.093,
20
- "train_steps_per_second": 0.063
21
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.1961859940234279,
4
+ "train_runtime": 15468.9338,
5
+ "train_samples": 122270,
6
+ "train_samples_per_second": 7.904,
7
+ "train_steps_per_second": 0.062
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  }
generation_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
- "eos_token_id": 2,
5
  "transformers_version": "4.38.2"
6
  }
 
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
+ "eos_token_id": 32000,
5
  "transformers_version": "4.38.2"
6
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b668f8f887f1df175ea3fede818f9956dde3a4f82b012ed40e1f75f528510af
3
- size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:995180ab1bfccd03f3ed43211962300537543bf349fa09701bdfed2446682de0
3
+ size 4943178720
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ab93f64cdd679c94fb82526772aa1ddff9407c1f21541bf00dadf1482df2e87
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bcfc7b87b51e023f9120c05ca16d79dc2363a9b1014b9e836f1d230e2035fc3
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dabcfcdd26b2c41b402da15a5fc4afd601be2f77a064f1dacd450e2fbd05129f
3
- size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93c22b80739cca4d51c122983391f1fc2fe409c9718974e8823aff56b6815dfe
3
+ size 4540532728
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 14483464192
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 14483496960
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
runs/May09_18-46-08_n136-144-082/events.out.tfevents.1715252270.n136-144-082.2278777.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccd9051f9116773434c18da461972c5355c7205a2dec6e5a1ad38752d7c43032
3
- size 74047
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa23ceecdb3712ec605ab90931ebf9878e5bbf973580c3b549d65cb4da191613
3
+ size 77841
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.5478911828795238,
4
- "train_runtime": 7553.9268,
5
- "train_samples": 61134,
6
- "train_samples_per_second": 8.093,
7
- "train_steps_per_second": 0.063
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.1961859940234279,
4
+ "train_runtime": 15468.9338,
5
+ "train_samples": 122270,
6
+ "train_samples_per_second": 7.904,
7
+ "train_steps_per_second": 0.062
8
  }
trainer_state.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 100,
6
- "global_step": 478,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
- "grad_norm": 49.891043665102934,
14
- "learning_rate": 1.0416666666666666e-08,
15
- "logits/chosen": -2.7660439014434814,
16
- "logits/rejected": -2.717564582824707,
17
- "logps/chosen": -269.8568420410156,
18
- "logps/rejected": -360.52459716796875,
19
  "loss": 0.6931,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
@@ -23,787 +23,1587 @@
23
  "rewards/rejected": 0.0,
24
  "step": 1
25
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  {
27
  "epoch": 0.02,
28
- "grad_norm": 46.946091297352105,
29
  "learning_rate": 1.0416666666666667e-07,
30
- "logits/chosen": -2.592543125152588,
31
- "logits/rejected": -2.56319522857666,
32
- "logps/chosen": -264.7040100097656,
33
- "logps/rejected": -251.515625,
34
- "loss": 0.6933,
35
- "rewards/accuracies": 0.4791666567325592,
36
- "rewards/chosen": 0.004693002440035343,
37
- "rewards/margins": 0.0028277651872485876,
38
- "rewards/rejected": 0.0018652371363714337,
39
- "step": 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  },
41
  {
42
  "epoch": 0.04,
43
- "grad_norm": 41.817724108185395,
44
  "learning_rate": 2.0833333333333333e-07,
45
- "logits/chosen": -2.65449595451355,
46
- "logits/rejected": -2.6068952083587646,
47
- "logps/chosen": -280.5221252441406,
48
- "logps/rejected": -295.92376708984375,
49
- "loss": 0.689,
50
- "rewards/accuracies": 0.581250011920929,
51
- "rewards/chosen": 0.05156273767352104,
52
- "rewards/margins": 0.00740828737616539,
53
- "rewards/rejected": 0.04415445029735565,
54
- "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  },
56
  {
57
  "epoch": 0.06,
58
- "grad_norm": 39.81553425430633,
59
  "learning_rate": 3.1249999999999997e-07,
60
- "logits/chosen": -2.6671488285064697,
61
- "logits/rejected": -2.5955922603607178,
62
- "logps/chosen": -296.41644287109375,
63
- "logps/rejected": -260.6401672363281,
64
- "loss": 0.6733,
65
- "rewards/accuracies": 0.59375,
66
- "rewards/chosen": 0.2127685844898224,
67
- "rewards/margins": 0.04726782441139221,
68
- "rewards/rejected": 0.16550076007843018,
69
- "step": 30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  },
71
  {
72
  "epoch": 0.08,
73
- "grad_norm": 38.58454153774096,
74
  "learning_rate": 4.1666666666666667e-07,
75
- "logits/chosen": -2.5658886432647705,
76
- "logits/rejected": -2.5324325561523438,
77
- "logps/chosen": -259.78594970703125,
78
- "logps/rejected": -241.00991821289062,
79
- "loss": 0.6399,
80
- "rewards/accuracies": 0.7250000238418579,
81
- "rewards/chosen": 0.3669721484184265,
82
- "rewards/margins": 0.19786901772022247,
83
- "rewards/rejected": 0.16910310089588165,
84
- "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  },
86
  {
87
  "epoch": 0.1,
88
- "grad_norm": 37.351752662935816,
89
- "learning_rate": 4.999733114418725e-07,
90
- "logits/chosen": -2.5195257663726807,
91
- "logits/rejected": -2.4827651977539062,
92
- "logps/chosen": -273.65081787109375,
93
- "logps/rejected": -290.78680419921875,
94
- "loss": 0.6094,
95
- "rewards/accuracies": 0.6312500238418579,
96
- "rewards/chosen": 0.304054319858551,
97
- "rewards/margins": 0.2041884958744049,
98
- "rewards/rejected": 0.09986577928066254,
99
- "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  },
101
  {
102
  "epoch": 0.13,
103
- "grad_norm": 39.61129660699584,
104
- "learning_rate": 4.990398100856366e-07,
105
- "logits/chosen": -2.567991018295288,
106
- "logits/rejected": -2.5036864280700684,
107
- "logps/chosen": -260.38055419921875,
108
- "logps/rejected": -294.011474609375,
109
- "loss": 0.6013,
110
- "rewards/accuracies": 0.6937500238418579,
111
- "rewards/chosen": 0.5612996220588684,
112
- "rewards/margins": 0.3578048348426819,
113
- "rewards/rejected": 0.20349478721618652,
114
- "step": 60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  },
116
  {
117
  "epoch": 0.15,
118
- "grad_norm": 41.460696281749556,
119
- "learning_rate": 4.967775735898179e-07,
120
- "logits/chosen": -2.460195302963257,
121
- "logits/rejected": -2.46120023727417,
122
- "logps/chosen": -253.1399383544922,
123
- "logps/rejected": -253.4242706298828,
124
- "loss": 0.5693,
125
- "rewards/accuracies": 0.78125,
126
- "rewards/chosen": 0.6238263845443726,
127
- "rewards/margins": 0.4591788649559021,
128
- "rewards/rejected": 0.16464750468730927,
129
- "step": 70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  },
131
  {
132
  "epoch": 0.17,
133
- "grad_norm": 61.37030849441711,
134
- "learning_rate": 4.931986719649298e-07,
135
- "logits/chosen": -2.615948438644409,
136
- "logits/rejected": -2.5394978523254395,
137
- "logps/chosen": -311.7240295410156,
138
- "logps/rejected": -263.1805725097656,
139
- "loss": 0.5671,
140
- "rewards/accuracies": 0.6937500238418579,
141
- "rewards/chosen": 0.5546952486038208,
142
- "rewards/margins": 0.5107932686805725,
143
- "rewards/rejected": 0.04390193149447441,
144
- "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  },
146
  {
147
  "epoch": 0.19,
148
- "grad_norm": 39.59727717104598,
149
- "learning_rate": 4.883222001996351e-07,
150
- "logits/chosen": -2.5085294246673584,
151
- "logits/rejected": -2.4543616771698,
152
- "logps/chosen": -251.203369140625,
153
- "logps/rejected": -259.8647766113281,
154
- "loss": 0.5646,
155
- "rewards/accuracies": 0.7562500238418579,
156
- "rewards/chosen": 0.3953971564769745,
157
- "rewards/margins": 0.7687323689460754,
158
- "rewards/rejected": -0.37333518266677856,
159
- "step": 90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  },
161
  {
162
  "epoch": 0.21,
163
- "grad_norm": 36.57841721590594,
164
- "learning_rate": 4.821741763807186e-07,
165
- "logits/chosen": -2.499514102935791,
166
- "logits/rejected": -2.4649369716644287,
167
- "logps/chosen": -248.44363403320312,
168
- "logps/rejected": -257.64776611328125,
169
- "loss": 0.565,
170
- "rewards/accuracies": 0.731249988079071,
171
- "rewards/chosen": 0.5957446694374084,
172
- "rewards/margins": 0.6267115473747253,
173
- "rewards/rejected": -0.03096688725054264,
174
- "step": 100
175
  },
176
  {
177
  "epoch": 0.21,
178
- "eval_logits/chosen": -2.550398111343384,
179
- "eval_logits/rejected": -2.5104503631591797,
180
- "eval_logps/chosen": -250.69297790527344,
181
- "eval_logps/rejected": -262.7791748046875,
182
- "eval_loss": 0.5717624425888062,
183
- "eval_rewards/accuracies": 0.73828125,
184
- "eval_rewards/chosen": 0.5950239300727844,
185
- "eval_rewards/margins": 0.6006231904029846,
186
- "eval_rewards/rejected": -0.005599223077297211,
187
- "eval_runtime": 96.9486,
188
- "eval_samples_per_second": 20.629,
189
- "eval_steps_per_second": 0.33,
190
- "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  },
192
  {
193
  "epoch": 0.23,
194
- "grad_norm": 51.91494841998397,
195
- "learning_rate": 4.747874028753375e-07,
196
- "logits/chosen": -2.55851149559021,
197
- "logits/rejected": -2.4656014442443848,
198
- "logps/chosen": -292.62615966796875,
199
- "logps/rejected": -258.59661865234375,
200
- "loss": 0.5713,
201
- "rewards/accuracies": 0.75,
202
- "rewards/chosen": 0.5976964831352234,
203
- "rewards/margins": 0.6357330083847046,
204
- "rewards/rejected": -0.0380365327000618,
205
- "step": 110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  },
207
  {
208
  "epoch": 0.25,
209
- "grad_norm": 70.69069363258822,
210
- "learning_rate": 4.662012913161997e-07,
211
- "logits/chosen": -2.4600424766540527,
212
- "logits/rejected": -2.4324684143066406,
213
- "logps/chosen": -270.7308349609375,
214
- "logps/rejected": -260.5433349609375,
215
- "loss": 0.5497,
216
- "rewards/accuracies": 0.737500011920929,
217
- "rewards/chosen": 0.5218156576156616,
218
- "rewards/margins": 0.5561539530754089,
219
- "rewards/rejected": -0.03433822840452194,
220
- "step": 120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  },
222
  {
223
  "epoch": 0.27,
224
- "grad_norm": 42.312370253489476,
225
- "learning_rate": 4.5646165232345103e-07,
226
- "logits/chosen": -2.464791774749756,
227
- "logits/rejected": -2.439894676208496,
228
- "logps/chosen": -268.9382019042969,
229
- "logps/rejected": -269.9627685546875,
230
- "loss": 0.5423,
231
- "rewards/accuracies": 0.6875,
232
- "rewards/chosen": 0.6532469987869263,
233
- "rewards/margins": 0.7295945882797241,
234
- "rewards/rejected": -0.07634757459163666,
235
- "step": 130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  },
237
  {
238
  "epoch": 0.29,
239
- "grad_norm": 40.45859260542855,
240
- "learning_rate": 4.456204510851956e-07,
241
- "logits/chosen": -2.5265681743621826,
242
- "logits/rejected": -2.485774517059326,
243
- "logps/chosen": -303.1440124511719,
244
- "logps/rejected": -301.68914794921875,
245
- "loss": 0.5376,
246
- "rewards/accuracies": 0.768750011920929,
247
- "rewards/chosen": 0.6410696506500244,
248
- "rewards/margins": 0.6916864514350891,
249
- "rewards/rejected": -0.0506168007850647,
250
- "step": 140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  },
252
  {
253
  "epoch": 0.31,
254
- "grad_norm": 41.1747855806655,
255
- "learning_rate": 4.337355301007335e-07,
256
- "logits/chosen": -2.5189616680145264,
257
- "logits/rejected": -2.4531705379486084,
258
- "logps/chosen": -272.0736999511719,
259
- "logps/rejected": -276.2969055175781,
260
- "loss": 0.5442,
261
- "rewards/accuracies": 0.737500011920929,
262
- "rewards/chosen": 0.5575242042541504,
263
- "rewards/margins": 0.5619192719459534,
264
- "rewards/rejected": -0.004395070485770702,
265
- "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  },
267
  {
268
  "epoch": 0.33,
269
- "grad_norm": 48.726180323544725,
270
- "learning_rate": 4.2087030056579986e-07,
271
- "logits/chosen": -2.5174994468688965,
272
- "logits/rejected": -2.43558406829834,
273
- "logps/chosen": -260.0892028808594,
274
- "logps/rejected": -260.7149658203125,
275
- "loss": 0.5652,
276
- "rewards/accuracies": 0.737500011920929,
277
- "rewards/chosen": 0.6489425897598267,
278
- "rewards/margins": 0.8134964108467102,
279
- "rewards/rejected": -0.16455380618572235,
280
- "step": 160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  },
282
  {
283
  "epoch": 0.36,
284
- "grad_norm": 49.53825953706789,
285
- "learning_rate": 4.070934040463998e-07,
286
- "logits/chosen": -2.4509148597717285,
287
- "logits/rejected": -2.3897039890289307,
288
- "logps/chosen": -239.52261352539062,
289
- "logps/rejected": -233.6277618408203,
290
- "loss": 0.5489,
291
- "rewards/accuracies": 0.7250000238418579,
292
- "rewards/chosen": 0.49063143134117126,
293
- "rewards/margins": 0.6157802939414978,
294
- "rewards/rejected": -0.12514881789684296,
295
- "step": 170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  },
297
  {
298
  "epoch": 0.38,
299
- "grad_norm": 51.08561061111303,
300
- "learning_rate": 3.9247834624635404e-07,
301
- "logits/chosen": -2.3483898639678955,
302
- "logits/rejected": -2.306784152984619,
303
- "logps/chosen": -247.6396026611328,
304
- "logps/rejected": -231.8523406982422,
305
- "loss": 0.5181,
306
- "rewards/accuracies": 0.7250000238418579,
307
- "rewards/chosen": 0.43596941232681274,
308
- "rewards/margins": 0.6500319242477417,
309
- "rewards/rejected": -0.21406252682209015,
310
- "step": 180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  },
312
  {
313
  "epoch": 0.4,
314
- "grad_norm": 42.31027201995276,
315
- "learning_rate": 3.7710310482256523e-07,
316
- "logits/chosen": -2.41634464263916,
317
- "logits/rejected": -2.378105878829956,
318
- "logps/chosen": -260.20306396484375,
319
- "logps/rejected": -261.46502685546875,
320
- "loss": 0.5392,
321
- "rewards/accuracies": 0.7124999761581421,
322
- "rewards/chosen": 0.5519876480102539,
323
- "rewards/margins": 0.6375012993812561,
324
- "rewards/rejected": -0.08551368862390518,
325
- "step": 190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
  },
327
  {
328
  "epoch": 0.42,
329
- "grad_norm": 102.86207924802177,
330
- "learning_rate": 3.610497133404795e-07,
331
- "logits/chosen": -2.392763614654541,
332
- "logits/rejected": -2.381993532180786,
333
- "logps/chosen": -249.912109375,
334
- "logps/rejected": -256.75439453125,
335
- "loss": 0.5467,
336
- "rewards/accuracies": 0.731249988079071,
337
- "rewards/chosen": 0.49922746419906616,
338
- "rewards/margins": 0.7344967126846313,
339
- "rewards/rejected": -0.235269233584404,
340
- "step": 200
341
  },
342
  {
343
  "epoch": 0.42,
344
- "eval_logits/chosen": -2.517864942550659,
345
- "eval_logits/rejected": -2.4783387184143066,
346
- "eval_logps/chosen": -249.6370849609375,
347
- "eval_logps/rejected": -264.89788818359375,
348
- "eval_loss": 0.5432960391044617,
349
- "eval_rewards/accuracies": 0.74609375,
350
- "eval_rewards/chosen": 0.6478186845779419,
351
- "eval_rewards/margins": 0.759353518486023,
352
- "eval_rewards/rejected": -0.11153475195169449,
353
- "eval_runtime": 96.4207,
354
- "eval_samples_per_second": 20.742,
355
- "eval_steps_per_second": 0.332,
356
- "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  },
358
  {
359
  "epoch": 0.44,
360
- "grad_norm": 45.308290366409736,
361
- "learning_rate": 3.4440382358952115e-07,
362
- "logits/chosen": -2.4460113048553467,
363
- "logits/rejected": -2.391810894012451,
364
- "logps/chosen": -278.56781005859375,
365
- "logps/rejected": -257.2254943847656,
366
- "loss": 0.5436,
367
- "rewards/accuracies": 0.731249988079071,
368
- "rewards/chosen": 0.5562152862548828,
369
- "rewards/margins": 0.8551079034805298,
370
- "rewards/rejected": -0.29889267683029175,
371
- "step": 210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
  },
373
  {
374
  "epoch": 0.46,
375
- "grad_norm": 50.1182470431882,
376
- "learning_rate": 3.272542485937368e-07,
377
- "logits/chosen": -2.4605488777160645,
378
- "logits/rejected": -2.42708683013916,
379
- "logps/chosen": -257.90826416015625,
380
- "logps/rejected": -253.3182830810547,
381
- "loss": 0.54,
382
- "rewards/accuracies": 0.793749988079071,
383
- "rewards/chosen": 0.3734419643878937,
384
- "rewards/margins": 0.7561658024787903,
385
- "rewards/rejected": -0.382723867893219,
386
- "step": 220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387
  },
388
  {
389
  "epoch": 0.48,
390
- "grad_norm": 43.71024962971359,
391
- "learning_rate": 3.096924887558854e-07,
392
- "logits/chosen": -2.490509510040283,
393
- "logits/rejected": -2.4491913318634033,
394
- "logps/chosen": -237.17898559570312,
395
- "logps/rejected": -251.81686401367188,
396
- "loss": 0.5441,
397
- "rewards/accuracies": 0.7437499761581421,
398
- "rewards/chosen": 0.5843235850334167,
399
- "rewards/margins": 0.7882751226425171,
400
- "rewards/rejected": -0.20395155251026154,
401
- "step": 230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
  },
403
  {
404
  "epoch": 0.5,
405
- "grad_norm": 44.93616969967234,
406
- "learning_rate": 2.9181224366319943e-07,
407
- "logits/chosen": -2.533695697784424,
408
- "logits/rejected": -2.500807285308838,
409
- "logps/chosen": -253.635498046875,
410
- "logps/rejected": -253.0944061279297,
411
- "loss": 0.5142,
412
- "rewards/accuracies": 0.699999988079071,
413
- "rewards/chosen": 0.5032340884208679,
414
- "rewards/margins": 0.7045356035232544,
415
- "rewards/rejected": -0.2013014256954193,
416
- "step": 240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
  },
418
  {
419
  "epoch": 0.52,
420
- "grad_norm": 42.68904256130122,
421
- "learning_rate": 2.7370891215954565e-07,
422
- "logits/chosen": -2.483025074005127,
423
- "logits/rejected": -2.4015185832977295,
424
- "logps/chosen": -285.0963439941406,
425
- "logps/rejected": -263.43560791015625,
426
- "loss": 0.5198,
427
- "rewards/accuracies": 0.762499988079071,
428
- "rewards/chosen": 0.6547069549560547,
429
- "rewards/margins": 0.850358784198761,
430
- "rewards/rejected": -0.19565197825431824,
431
- "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432
  },
433
  {
434
  "epoch": 0.54,
435
- "grad_norm": 45.43502171857602,
436
- "learning_rate": 2.55479083351317e-07,
437
- "logits/chosen": -2.516913890838623,
438
- "logits/rejected": -2.478473424911499,
439
- "logps/chosen": -282.80230712890625,
440
- "logps/rejected": -258.77288818359375,
441
- "loss": 0.5235,
442
- "rewards/accuracies": 0.768750011920929,
443
- "rewards/chosen": 0.5754625797271729,
444
- "rewards/margins": 0.8150871396064758,
445
- "rewards/rejected": -0.23962458968162537,
446
- "step": 260
447
  },
448
  {
449
- "epoch": 0.56,
450
- "grad_norm": 41.73526734917468,
451
- "learning_rate": 2.3722002126275822e-07,
452
- "logits/chosen": -2.5381340980529785,
453
- "logits/rejected": -2.4941086769104004,
454
- "logps/chosen": -267.4333190917969,
455
- "logps/rejected": -260.50677490234375,
456
- "loss": 0.5406,
457
- "rewards/accuracies": 0.675000011920929,
458
- "rewards/chosen": 0.4781308174133301,
459
- "rewards/margins": 0.6212563514709473,
460
- "rewards/rejected": -0.14312560856342316,
461
- "step": 270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
  },
463
  {
464
  "epoch": 0.59,
465
- "grad_norm": 48.561323508433155,
466
- "learning_rate": 2.19029145890313e-07,
467
- "logits/chosen": -2.510133743286133,
468
- "logits/rejected": -2.4422435760498047,
469
- "logps/chosen": -250.73855590820312,
470
- "logps/rejected": -247.487060546875,
471
- "loss": 0.5599,
472
- "rewards/accuracies": 0.737500011920929,
473
- "rewards/chosen": 0.5469261407852173,
474
- "rewards/margins": 0.8119627833366394,
475
- "rewards/rejected": -0.2650366425514221,
476
- "step": 280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
477
  },
478
  {
479
  "epoch": 0.61,
480
- "grad_norm": 44.504093632124075,
481
- "learning_rate": 2.0100351342479216e-07,
482
- "logits/chosen": -2.5589568614959717,
483
- "logits/rejected": -2.5217483043670654,
484
- "logps/chosen": -240.7520751953125,
485
- "logps/rejected": -244.8422088623047,
486
- "loss": 0.5354,
487
- "rewards/accuracies": 0.7250000238418579,
488
- "rewards/chosen": 0.5167636871337891,
489
- "rewards/margins": 0.661081075668335,
490
- "rewards/rejected": -0.14431743323802948,
491
- "step": 290
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
  },
493
  {
494
  "epoch": 0.63,
495
- "grad_norm": 52.52022669231452,
496
- "learning_rate": 1.8323929841460178e-07,
497
- "logits/chosen": -2.5429511070251465,
498
- "logits/rejected": -2.472679376602173,
499
- "logps/chosen": -292.2240905761719,
500
- "logps/rejected": -266.68658447265625,
501
- "loss": 0.517,
502
- "rewards/accuracies": 0.731249988079071,
503
- "rewards/chosen": 0.4385985732078552,
504
- "rewards/margins": 0.7676541209220886,
505
- "rewards/rejected": -0.329055517911911,
506
- "step": 300
507
  },
508
  {
509
  "epoch": 0.63,
510
- "eval_logits/chosen": -2.5622596740722656,
511
- "eval_logits/rejected": -2.520256280899048,
512
- "eval_logps/chosen": -251.2219696044922,
513
- "eval_logps/rejected": -268.04449462890625,
514
- "eval_loss": 0.53697669506073,
515
- "eval_rewards/accuracies": 0.76953125,
516
- "eval_rewards/chosen": 0.5685745477676392,
517
- "eval_rewards/margins": 0.8374388217926025,
518
- "eval_rewards/rejected": -0.2688642740249634,
519
- "eval_runtime": 96.3678,
520
- "eval_samples_per_second": 20.754,
521
- "eval_steps_per_second": 0.332,
522
- "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523
  },
524
  {
525
  "epoch": 0.65,
526
- "grad_norm": 43.866661437938184,
527
- "learning_rate": 1.6583128063291573e-07,
528
- "logits/chosen": -2.4593474864959717,
529
- "logits/rejected": -2.443233013153076,
530
- "logps/chosen": -285.5498046875,
531
- "logps/rejected": -263.8379821777344,
532
- "loss": 0.5077,
533
- "rewards/accuracies": 0.768750011920929,
534
- "rewards/chosen": 0.4893050193786621,
535
- "rewards/margins": 0.7553777098655701,
536
- "rewards/rejected": -0.26607269048690796,
537
- "step": 310
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
538
  },
539
  {
540
  "epoch": 0.67,
541
- "grad_norm": 43.407860217947494,
542
- "learning_rate": 1.488723393865766e-07,
543
- "logits/chosen": -2.4746253490448,
544
- "logits/rejected": -2.4388270378112793,
545
- "logps/chosen": -283.4583740234375,
546
- "logps/rejected": -250.38204956054688,
547
- "loss": 0.504,
548
- "rewards/accuracies": 0.731249988079071,
549
- "rewards/chosen": 0.5105848908424377,
550
- "rewards/margins": 0.788524329662323,
551
- "rewards/rejected": -0.2779393792152405,
552
- "step": 320
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
553
  },
554
  {
555
  "epoch": 0.69,
556
- "grad_norm": 40.302692173545196,
557
- "learning_rate": 1.3245295796480788e-07,
558
- "logits/chosen": -2.4712371826171875,
559
- "logits/rejected": -2.4099698066711426,
560
- "logps/chosen": -252.349853515625,
561
- "logps/rejected": -264.03912353515625,
562
- "loss": 0.5242,
563
- "rewards/accuracies": 0.737500011920929,
564
- "rewards/chosen": 0.4930170178413391,
565
- "rewards/margins": 0.7200408577919006,
566
- "rewards/rejected": -0.2270239144563675,
567
- "step": 330
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
568
  },
569
  {
570
  "epoch": 0.71,
571
- "grad_norm": 50.168955016672676,
572
- "learning_rate": 1.1666074087171627e-07,
573
- "logits/chosen": -2.467729091644287,
574
- "logits/rejected": -2.4046943187713623,
575
- "logps/chosen": -278.697509765625,
576
- "logps/rejected": -285.4507141113281,
577
- "loss": 0.524,
578
- "rewards/accuracies": 0.7437499761581421,
579
- "rewards/chosen": 0.5648467540740967,
580
- "rewards/margins": 0.8352931141853333,
581
- "rewards/rejected": -0.2704463601112366,
582
- "step": 340
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
583
  },
584
  {
585
  "epoch": 0.73,
586
- "grad_norm": 46.15971070553052,
587
- "learning_rate": 1.0157994641835734e-07,
588
- "logits/chosen": -2.445666790008545,
589
- "logits/rejected": -2.377004384994507,
590
- "logps/chosen": -248.63241577148438,
591
- "logps/rejected": -248.23904418945312,
592
- "loss": 0.4924,
593
- "rewards/accuracies": 0.75,
594
- "rewards/chosen": 0.47894006967544556,
595
- "rewards/margins": 0.8554509878158569,
596
- "rewards/rejected": -0.37651100754737854,
597
- "step": 350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598
  },
599
  {
600
  "epoch": 0.75,
601
- "grad_norm": 54.17198760484943,
602
- "learning_rate": 8.729103716819111e-08,
603
- "logits/chosen": -2.4745380878448486,
604
- "logits/rejected": -2.376185178756714,
605
- "logps/chosen": -292.89483642578125,
606
- "logps/rejected": -269.1952209472656,
607
- "loss": 0.5388,
608
- "rewards/accuracies": 0.7562500238418579,
609
- "rewards/chosen": 0.4898607134819031,
610
- "rewards/margins": 0.8843740224838257,
611
- "rewards/rejected": -0.3945133090019226,
612
- "step": 360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
613
  },
614
  {
615
  "epoch": 0.77,
616
- "grad_norm": 44.15468601338237,
617
- "learning_rate": 7.387025063449081e-08,
618
- "logits/chosen": -2.409170150756836,
619
- "logits/rejected": -2.367518186569214,
620
- "logps/chosen": -266.35430908203125,
621
- "logps/rejected": -242.5480194091797,
622
- "loss": 0.5384,
623
- "rewards/accuracies": 0.6875,
624
- "rewards/chosen": 0.44893550872802734,
625
- "rewards/margins": 0.6646324992179871,
626
- "rewards/rejected": -0.21569697558879852,
627
- "step": 370
628
  },
629
  {
630
- "epoch": 0.79,
631
- "grad_norm": 39.47383320898196,
632
- "learning_rate": 6.138919252022435e-08,
633
- "logits/chosen": -2.3523006439208984,
634
- "logits/rejected": -2.3420968055725098,
635
- "logps/chosen": -230.9795379638672,
636
- "logps/rejected": -267.8912658691406,
637
- "loss": 0.5181,
638
- "rewards/accuracies": 0.737500011920929,
639
- "rewards/chosen": 0.4252557158470154,
640
- "rewards/margins": 0.8839966058731079,
641
- "rewards/rejected": -0.45874080061912537,
642
- "step": 380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
643
  },
644
  {
645
  "epoch": 0.82,
646
- "grad_norm": 48.64961363299689,
647
- "learning_rate": 4.991445467064689e-08,
648
- "logits/chosen": -2.4286305904388428,
649
- "logits/rejected": -2.394604206085205,
650
- "logps/chosen": -293.20440673828125,
651
- "logps/rejected": -287.0997009277344,
652
- "loss": 0.5149,
653
- "rewards/accuracies": 0.7124999761581421,
654
- "rewards/chosen": 0.6216251850128174,
655
- "rewards/margins": 0.7780872583389282,
656
- "rewards/rejected": -0.15646204352378845,
657
- "step": 390
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
658
  },
659
  {
660
  "epoch": 0.84,
661
- "grad_norm": 44.38080817079193,
662
- "learning_rate": 3.9507259776993954e-08,
663
- "logits/chosen": -2.4102301597595215,
664
- "logits/rejected": -2.3357295989990234,
665
- "logps/chosen": -259.7147521972656,
666
- "logps/rejected": -273.10699462890625,
667
- "loss": 0.518,
668
- "rewards/accuracies": 0.762499988079071,
669
- "rewards/chosen": 0.541024386882782,
670
- "rewards/margins": 0.8457515835762024,
671
- "rewards/rejected": -0.30472710728645325,
672
- "step": 400
673
  },
674
  {
675
  "epoch": 0.84,
676
- "eval_logits/chosen": -2.4731171131134033,
677
- "eval_logits/rejected": -2.4323782920837402,
678
- "eval_logps/chosen": -250.02178955078125,
679
- "eval_logps/rejected": -267.0915222167969,
680
- "eval_loss": 0.5348160862922668,
681
- "eval_rewards/accuracies": 0.75390625,
682
- "eval_rewards/chosen": 0.6285843849182129,
683
- "eval_rewards/margins": 0.8498014211654663,
684
- "eval_rewards/rejected": -0.22121697664260864,
685
- "eval_runtime": 96.4764,
686
- "eval_samples_per_second": 20.73,
687
- "eval_steps_per_second": 0.332,
688
- "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
689
  },
690
  {
691
  "epoch": 0.86,
692
- "grad_norm": 48.15981350653104,
693
- "learning_rate": 3.022313472693447e-08,
694
- "logits/chosen": -2.444577932357788,
695
- "logits/rejected": -2.3699073791503906,
696
- "logps/chosen": -286.5138854980469,
697
- "logps/rejected": -274.3666687011719,
698
- "loss": 0.5226,
699
- "rewards/accuracies": 0.768750011920929,
700
- "rewards/chosen": 0.6201778650283813,
701
- "rewards/margins": 0.8976529240608215,
702
- "rewards/rejected": -0.2774750590324402,
703
- "step": 410
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
704
  },
705
  {
706
  "epoch": 0.88,
707
- "grad_norm": 48.573506313099124,
708
- "learning_rate": 2.2111614344599684e-08,
709
- "logits/chosen": -2.429912805557251,
710
- "logits/rejected": -2.3931796550750732,
711
- "logps/chosen": -287.13067626953125,
712
- "logps/rejected": -279.46844482421875,
713
- "loss": 0.5212,
714
- "rewards/accuracies": 0.675000011920929,
715
- "rewards/chosen": 0.4971562325954437,
716
- "rewards/margins": 0.7474662065505981,
717
- "rewards/rejected": -0.25030994415283203,
718
- "step": 420
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
719
  },
720
  {
721
  "epoch": 0.9,
722
- "grad_norm": 41.98038749926915,
723
- "learning_rate": 1.521597710086439e-08,
724
- "logits/chosen": -2.3573684692382812,
725
- "logits/rejected": -2.3092567920684814,
726
- "logps/chosen": -269.9436950683594,
727
- "logps/rejected": -265.4564514160156,
728
- "loss": 0.501,
729
- "rewards/accuracies": 0.7875000238418579,
730
- "rewards/chosen": 0.45472264289855957,
731
- "rewards/margins": 0.838543713092804,
732
- "rewards/rejected": -0.38382115960121155,
733
- "step": 430
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
  },
735
  {
736
  "epoch": 0.92,
737
- "grad_norm": 44.22650678163462,
738
- "learning_rate": 9.57301420397924e-09,
739
- "logits/chosen": -2.4332785606384277,
740
- "logits/rejected": -2.3776473999023438,
741
- "logps/chosen": -272.65960693359375,
742
- "logps/rejected": -271.44329833984375,
743
- "loss": 0.5213,
744
- "rewards/accuracies": 0.6875,
745
- "rewards/chosen": 0.5838757157325745,
746
- "rewards/margins": 0.700454831123352,
747
- "rewards/rejected": -0.1165790781378746,
748
- "step": 440
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
749
  },
750
  {
751
  "epoch": 0.94,
752
- "grad_norm": 43.00589727019739,
753
- "learning_rate": 5.212833302556258e-09,
754
- "logits/chosen": -2.3836779594421387,
755
- "logits/rejected": -2.360665798187256,
756
- "logps/chosen": -284.2134704589844,
757
- "logps/rejected": -312.9830627441406,
758
- "loss": 0.5099,
759
- "rewards/accuracies": 0.6812499761581421,
760
- "rewards/chosen": 0.4735330641269684,
761
- "rewards/margins": 0.7689631581306458,
762
- "rewards/rejected": -0.29543009400367737,
763
- "step": 450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
764
  },
765
  {
766
  "epoch": 0.96,
767
- "grad_norm": 46.86754726240038,
768
- "learning_rate": 2.158697848236607e-09,
769
- "logits/chosen": -2.417273998260498,
770
- "logits/rejected": -2.377349376678467,
771
- "logps/chosen": -262.1804504394531,
772
- "logps/rejected": -247.7431182861328,
773
- "loss": 0.5264,
774
- "rewards/accuracies": 0.699999988079071,
775
- "rewards/chosen": 0.48920711874961853,
776
- "rewards/margins": 0.7419728636741638,
777
- "rewards/rejected": -0.2527657151222229,
778
- "step": 460
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
779
  },
780
  {
781
  "epoch": 0.98,
782
- "grad_norm": 45.055740606082026,
783
- "learning_rate": 4.269029751107489e-10,
784
- "logits/chosen": -2.4338390827178955,
785
- "logits/rejected": -2.3758208751678467,
786
- "logps/chosen": -268.4836730957031,
787
- "logps/rejected": -289.60205078125,
788
- "loss": 0.4974,
789
- "rewards/accuracies": 0.762499988079071,
790
- "rewards/chosen": 0.5107508897781372,
791
- "rewards/margins": 0.8732994794845581,
792
- "rewards/rejected": -0.3625485301017761,
793
- "step": 470
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
794
  },
795
  {
796
  "epoch": 1.0,
797
- "step": 478,
798
  "total_flos": 0.0,
799
- "train_loss": 0.5478911828795238,
800
- "train_runtime": 7553.9268,
801
- "train_samples_per_second": 8.093,
802
- "train_steps_per_second": 0.063
803
  }
804
  ],
805
  "logging_steps": 10,
806
- "max_steps": 478,
807
  "num_input_tokens_seen": 0,
808
  "num_train_epochs": 1,
809
  "save_steps": 100,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9994767137624281,
5
  "eval_steps": 100,
6
+ "global_step": 955,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
+ "grad_norm": 50.348802908690914,
14
+ "learning_rate": 5.208333333333333e-09,
15
+ "logits/chosen": -1.8382859230041504,
16
+ "logits/rejected": -1.788834810256958,
17
+ "logps/chosen": -119.0692138671875,
18
+ "logps/rejected": -76.35714721679688,
19
  "loss": 0.6931,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
 
23
  "rewards/rejected": 0.0,
24
  "step": 1
25
  },
26
+ {
27
+ "epoch": 0.01,
28
+ "grad_norm": 46.56256136532365,
29
+ "learning_rate": 5.208333333333333e-08,
30
+ "logits/chosen": -1.6623330116271973,
31
+ "logits/rejected": -1.5512146949768066,
32
+ "logps/chosen": -129.29541015625,
33
+ "logps/rejected": -82.82817840576172,
34
+ "loss": 0.6931,
35
+ "rewards/accuracies": 0.4375,
36
+ "rewards/chosen": -0.00018554604321252555,
37
+ "rewards/margins": 0.00036421266850084066,
38
+ "rewards/rejected": -0.0005497586098499596,
39
+ "step": 10
40
+ },
41
  {
42
  "epoch": 0.02,
43
+ "grad_norm": 47.9759657771044,
44
  "learning_rate": 1.0416666666666667e-07,
45
+ "logits/chosen": -1.6969773769378662,
46
+ "logits/rejected": -1.5268709659576416,
47
+ "logps/chosen": -140.09909057617188,
48
+ "logps/rejected": -80.9607162475586,
49
+ "loss": 0.6866,
50
+ "rewards/accuracies": 0.7124999761581421,
51
+ "rewards/chosen": 0.013138635084033012,
52
+ "rewards/margins": 0.015350555069744587,
53
+ "rewards/rejected": -0.002211919752880931,
54
+ "step": 20
55
+ },
56
+ {
57
+ "epoch": 0.03,
58
+ "grad_norm": 37.210502761575135,
59
+ "learning_rate": 1.5624999999999999e-07,
60
+ "logits/chosen": -1.7122924327850342,
61
+ "logits/rejected": -1.6340242624282837,
62
+ "logps/chosen": -119.13212585449219,
63
+ "logps/rejected": -84.83552551269531,
64
+ "loss": 0.657,
65
+ "rewards/accuracies": 0.862500011920929,
66
+ "rewards/chosen": 0.05337844043970108,
67
+ "rewards/margins": 0.0730198472738266,
68
+ "rewards/rejected": -0.01964140310883522,
69
+ "step": 30
70
  },
71
  {
72
  "epoch": 0.04,
73
+ "grad_norm": 38.29612001708921,
74
  "learning_rate": 2.0833333333333333e-07,
75
+ "logits/chosen": -1.8123095035552979,
76
+ "logits/rejected": -1.716663122177124,
77
+ "logps/chosen": -130.39593505859375,
78
+ "logps/rejected": -98.6071548461914,
79
+ "loss": 0.5915,
80
+ "rewards/accuracies": 0.90625,
81
+ "rewards/chosen": 0.1293005496263504,
82
+ "rewards/margins": 0.2863259017467499,
83
+ "rewards/rejected": -0.15702535212039948,
84
+ "step": 40
85
+ },
86
+ {
87
+ "epoch": 0.05,
88
+ "grad_norm": 25.169270167372616,
89
+ "learning_rate": 2.604166666666667e-07,
90
+ "logits/chosen": -1.6766433715820312,
91
+ "logits/rejected": -1.6306852102279663,
92
+ "logps/chosen": -127.97342681884766,
93
+ "logps/rejected": -136.70712280273438,
94
+ "loss": 0.484,
95
+ "rewards/accuracies": 0.8812500238418579,
96
+ "rewards/chosen": 0.0137257669121027,
97
+ "rewards/margins": 0.6059251427650452,
98
+ "rewards/rejected": -0.5921992063522339,
99
+ "step": 50
100
  },
101
  {
102
  "epoch": 0.06,
103
+ "grad_norm": 26.650306729386905,
104
  "learning_rate": 3.1249999999999997e-07,
105
+ "logits/chosen": -1.6139761209487915,
106
+ "logits/rejected": -1.610743761062622,
107
+ "logps/chosen": -173.83758544921875,
108
+ "logps/rejected": -218.09707641601562,
109
+ "loss": 0.3946,
110
+ "rewards/accuracies": 0.887499988079071,
111
+ "rewards/chosen": -0.24665935337543488,
112
+ "rewards/margins": 1.0333704948425293,
113
+ "rewards/rejected": -1.280029535293579,
114
+ "step": 60
115
+ },
116
+ {
117
+ "epoch": 0.07,
118
+ "grad_norm": 32.401140738769385,
119
+ "learning_rate": 3.645833333333333e-07,
120
+ "logits/chosen": -1.4975354671478271,
121
+ "logits/rejected": -1.4718661308288574,
122
+ "logps/chosen": -191.6230010986328,
123
+ "logps/rejected": -297.1890563964844,
124
+ "loss": 0.3474,
125
+ "rewards/accuracies": 0.84375,
126
+ "rewards/chosen": -0.6469367146492004,
127
+ "rewards/margins": 1.586439847946167,
128
+ "rewards/rejected": -2.2333762645721436,
129
+ "step": 70
130
  },
131
  {
132
  "epoch": 0.08,
133
+ "grad_norm": 31.488475681745037,
134
  "learning_rate": 4.1666666666666667e-07,
135
+ "logits/chosen": -1.4592185020446777,
136
+ "logits/rejected": -1.3416706323623657,
137
+ "logps/chosen": -249.4734649658203,
138
+ "logps/rejected": -405.06207275390625,
139
+ "loss": 0.2966,
140
+ "rewards/accuracies": 0.8812500238418579,
141
+ "rewards/chosen": -1.1794850826263428,
142
+ "rewards/margins": 2.0616517066955566,
143
+ "rewards/rejected": -3.2411365509033203,
144
+ "step": 80
145
+ },
146
+ {
147
+ "epoch": 0.09,
148
+ "grad_norm": 34.896761047309674,
149
+ "learning_rate": 4.6874999999999996e-07,
150
+ "logits/chosen": -1.2935254573822021,
151
+ "logits/rejected": -1.215850591659546,
152
+ "logps/chosen": -236.42935180664062,
153
+ "logps/rejected": -427.88079833984375,
154
+ "loss": 0.2792,
155
+ "rewards/accuracies": 0.862500011920929,
156
+ "rewards/chosen": -1.0514061450958252,
157
+ "rewards/margins": 2.4284262657165527,
158
+ "rewards/rejected": -3.479832410812378,
159
+ "step": 90
160
  },
161
  {
162
  "epoch": 0.1,
163
+ "grad_norm": 40.95979792917679,
164
+ "learning_rate": 4.999732492681437e-07,
165
+ "logits/chosen": -1.425644040107727,
166
+ "logits/rejected": -1.2574630975723267,
167
+ "logps/chosen": -236.2029571533203,
168
+ "logps/rejected": -470.1878967285156,
169
+ "loss": 0.2555,
170
+ "rewards/accuracies": 0.893750011920929,
171
+ "rewards/chosen": -1.0542919635772705,
172
+ "rewards/margins": 2.8619871139526367,
173
+ "rewards/rejected": -3.916278839111328,
174
+ "step": 100
175
+ },
176
+ {
177
+ "epoch": 0.1,
178
+ "eval_logits/chosen": -1.394241213798523,
179
+ "eval_logits/rejected": -1.3358198404312134,
180
+ "eval_logps/chosen": -800.6121215820312,
181
+ "eval_logps/rejected": -884.5335083007812,
182
+ "eval_loss": 1.4172171354293823,
183
+ "eval_rewards/accuracies": 0.58984375,
184
+ "eval_rewards/chosen": -4.888356685638428,
185
+ "eval_rewards/margins": 0.7817266583442688,
186
+ "eval_rewards/rejected": -5.670083045959473,
187
+ "eval_runtime": 97.8793,
188
+ "eval_samples_per_second": 20.433,
189
+ "eval_steps_per_second": 0.327,
190
+ "step": 100
191
+ },
192
+ {
193
+ "epoch": 0.12,
194
+ "grad_norm": 32.26210278499666,
195
+ "learning_rate": 4.996723692767926e-07,
196
+ "logits/chosen": -1.300011157989502,
197
+ "logits/rejected": -1.1855316162109375,
198
+ "logps/chosen": -253.3606719970703,
199
+ "logps/rejected": -546.6198120117188,
200
+ "loss": 0.2322,
201
+ "rewards/accuracies": 0.862500011920929,
202
+ "rewards/chosen": -1.3233938217163086,
203
+ "rewards/margins": 3.418271541595459,
204
+ "rewards/rejected": -4.741665840148926,
205
+ "step": 110
206
  },
207
  {
208
  "epoch": 0.13,
209
+ "grad_norm": 47.80125591905201,
210
+ "learning_rate": 4.990375746213598e-07,
211
+ "logits/chosen": -1.4404656887054443,
212
+ "logits/rejected": -1.311632513999939,
213
+ "logps/chosen": -286.67169189453125,
214
+ "logps/rejected": -644.265625,
215
+ "loss": 0.2299,
216
+ "rewards/accuracies": 0.893750011920929,
217
+ "rewards/chosen": -1.5356190204620361,
218
+ "rewards/margins": 4.171111583709717,
219
+ "rewards/rejected": -5.706730842590332,
220
+ "step": 120
221
+ },
222
+ {
223
+ "epoch": 0.14,
224
+ "grad_norm": 103.21873941787233,
225
+ "learning_rate": 4.980697142834314e-07,
226
+ "logits/chosen": -1.4422317743301392,
227
+ "logits/rejected": -1.295238733291626,
228
+ "logps/chosen": -258.7148742675781,
229
+ "logps/rejected": -593.8055419921875,
230
+ "loss": 0.2288,
231
+ "rewards/accuracies": 0.875,
232
+ "rewards/chosen": -1.2843689918518066,
233
+ "rewards/margins": 3.8400402069091797,
234
+ "rewards/rejected": -5.1244096755981445,
235
+ "step": 130
236
  },
237
  {
238
  "epoch": 0.15,
239
+ "grad_norm": 40.83439235144051,
240
+ "learning_rate": 4.967700826904229e-07,
241
+ "logits/chosen": -1.4604966640472412,
242
+ "logits/rejected": -1.289422631263733,
243
+ "logps/chosen": -291.0477600097656,
244
+ "logps/rejected": -753.3111572265625,
245
+ "loss": 0.2219,
246
+ "rewards/accuracies": 0.8687499761581421,
247
+ "rewards/chosen": -1.6053016185760498,
248
+ "rewards/margins": 5.150030612945557,
249
+ "rewards/rejected": -6.755332946777344,
250
+ "step": 140
251
+ },
252
+ {
253
+ "epoch": 0.16,
254
+ "grad_norm": 35.47272090007365,
255
+ "learning_rate": 4.951404179843962e-07,
256
+ "logits/chosen": -1.439012885093689,
257
+ "logits/rejected": -1.197693109512329,
258
+ "logps/chosen": -317.4605407714844,
259
+ "logps/rejected": -829.1416015625,
260
+ "loss": 0.1987,
261
+ "rewards/accuracies": 0.887499988079071,
262
+ "rewards/chosen": -1.6784883737564087,
263
+ "rewards/margins": 5.721285820007324,
264
+ "rewards/rejected": -7.399774074554443,
265
+ "step": 150
266
  },
267
  {
268
  "epoch": 0.17,
269
+ "grad_norm": 41.95405531315011,
270
+ "learning_rate": 4.931828996974498e-07,
271
+ "logits/chosen": -1.3928442001342773,
272
+ "logits/rejected": -1.1220872402191162,
273
+ "logps/chosen": -289.9758605957031,
274
+ "logps/rejected": -711.052001953125,
275
+ "loss": 0.21,
276
+ "rewards/accuracies": 0.875,
277
+ "rewards/chosen": -1.6032127141952515,
278
+ "rewards/margins": 4.786148548126221,
279
+ "rewards/rejected": -6.389361381530762,
280
+ "step": 160
281
+ },
282
+ {
283
+ "epoch": 0.18,
284
+ "grad_norm": 44.30975250132851,
285
+ "learning_rate": 4.909001458367866e-07,
286
+ "logits/chosen": -1.4432841539382935,
287
+ "logits/rejected": -1.237818956375122,
288
+ "logps/chosen": -394.95751953125,
289
+ "logps/rejected": -859.7473754882812,
290
+ "loss": 0.1885,
291
+ "rewards/accuracies": 0.856249988079071,
292
+ "rewards/chosen": -2.7817580699920654,
293
+ "rewards/margins": 5.132468223571777,
294
+ "rewards/rejected": -7.914226531982422,
295
+ "step": 170
296
  },
297
  {
298
  "epoch": 0.19,
299
+ "grad_norm": 42.31623927536496,
300
+ "learning_rate": 4.882952093833627e-07,
301
+ "logits/chosen": -1.450234055519104,
302
+ "logits/rejected": -1.2738139629364014,
303
+ "logps/chosen": -342.06048583984375,
304
+ "logps/rejected": -785.0755004882812,
305
+ "loss": 0.2045,
306
+ "rewards/accuracies": 0.8687499761581421,
307
+ "rewards/chosen": -2.051614999771118,
308
+ "rewards/margins": 4.953377723693848,
309
+ "rewards/rejected": -7.0049920082092285,
310
+ "step": 180
311
+ },
312
+ {
313
+ "epoch": 0.2,
314
+ "grad_norm": 39.855754537956905,
315
+ "learning_rate": 4.853715742087946e-07,
316
+ "logits/chosen": -1.3248379230499268,
317
+ "logits/rejected": -1.09066903591156,
318
+ "logps/chosen": -368.9568786621094,
319
+ "logps/rejected": -883.1036987304688,
320
+ "loss": 0.1906,
321
+ "rewards/accuracies": 0.893750011920929,
322
+ "rewards/chosen": -2.428023099899292,
323
+ "rewards/margins": 5.649343967437744,
324
+ "rewards/rejected": -8.077366828918457,
325
+ "step": 190
326
  },
327
  {
328
  "epoch": 0.21,
329
+ "grad_norm": 43.44905869134368,
330
+ "learning_rate": 4.821331504159906e-07,
331
+ "logits/chosen": -1.122195839881897,
332
+ "logits/rejected": -0.9366437196731567,
333
+ "logps/chosen": -330.4066467285156,
334
+ "logps/rejected": -748.525634765625,
335
+ "loss": 0.1854,
336
+ "rewards/accuracies": 0.856249988079071,
337
+ "rewards/chosen": -2.157914161682129,
338
+ "rewards/margins": 4.577654838562012,
339
+ "rewards/rejected": -6.735569000244141,
340
+ "step": 200
341
  },
342
  {
343
  "epoch": 0.21,
344
+ "eval_logits/chosen": -1.1852705478668213,
345
+ "eval_logits/rejected": -1.108778953552246,
346
+ "eval_logps/chosen": -926.8517456054688,
347
+ "eval_logps/rejected": -1050.1199951171875,
348
+ "eval_loss": 1.6753935813903809,
349
+ "eval_rewards/accuracies": 0.62109375,
350
+ "eval_rewards/chosen": -6.15075159072876,
351
+ "eval_rewards/margins": 1.1751970052719116,
352
+ "eval_rewards/rejected": -7.325948715209961,
353
+ "eval_runtime": 97.5501,
354
+ "eval_samples_per_second": 20.502,
355
+ "eval_steps_per_second": 0.328,
356
+ "step": 200
357
+ },
358
+ {
359
+ "epoch": 0.22,
360
+ "grad_norm": 36.690007199237776,
361
+ "learning_rate": 4.785842691097342e-07,
362
+ "logits/chosen": -1.129880666732788,
363
+ "logits/rejected": -0.8643050193786621,
364
+ "logps/chosen": -371.864013671875,
365
+ "logps/rejected": -938.4737548828125,
366
+ "loss": 0.174,
367
+ "rewards/accuracies": 0.875,
368
+ "rewards/chosen": -2.416229009628296,
369
+ "rewards/margins": 6.226431369781494,
370
+ "rewards/rejected": -8.642660140991211,
371
+ "step": 210
372
  },
373
  {
374
  "epoch": 0.23,
375
+ "grad_norm": 50.37285904600086,
376
+ "learning_rate": 4.7472967660421603e-07,
377
+ "logits/chosen": -1.2257055044174194,
378
+ "logits/rejected": -0.964257538318634,
379
+ "logps/chosen": -339.07196044921875,
380
+ "logps/rejected": -897.4797973632812,
381
+ "loss": 0.1776,
382
+ "rewards/accuracies": 0.8999999761581421,
383
+ "rewards/chosen": -2.065497875213623,
384
+ "rewards/margins": 6.163290023803711,
385
+ "rewards/rejected": -8.228787422180176,
386
+ "step": 220
387
+ },
388
+ {
389
+ "epoch": 0.24,
390
+ "grad_norm": 41.9657054722659,
391
+ "learning_rate": 4.705745280752585e-07,
392
+ "logits/chosen": -1.2483174800872803,
393
+ "logits/rejected": -0.9970757365226746,
394
+ "logps/chosen": -328.69775390625,
395
+ "logps/rejected": -740.644775390625,
396
+ "loss": 0.1898,
397
+ "rewards/accuracies": 0.856249988079071,
398
+ "rewards/chosen": -1.9448823928833008,
399
+ "rewards/margins": 4.71053409576416,
400
+ "rewards/rejected": -6.655416965484619,
401
+ "step": 230
402
  },
403
  {
404
  "epoch": 0.25,
405
+ "grad_norm": 44.41737576885688,
406
+ "learning_rate": 4.6612438066572555e-07,
407
+ "logits/chosen": -1.1642967462539673,
408
+ "logits/rejected": -0.9903414845466614,
409
+ "logps/chosen": -318.5704650878906,
410
+ "logps/rejected": -773.8700561523438,
411
+ "loss": 0.1919,
412
+ "rewards/accuracies": 0.875,
413
+ "rewards/chosen": -1.9761199951171875,
414
+ "rewards/margins": 5.03547477722168,
415
+ "rewards/rejected": -7.011595249176025,
416
+ "step": 240
417
+ },
418
+ {
419
+ "epoch": 0.26,
420
+ "grad_norm": 35.762754087270444,
421
+ "learning_rate": 4.6138518605333664e-07,
422
+ "logits/chosen": -1.106542706489563,
423
+ "logits/rejected": -0.8413636088371277,
424
+ "logps/chosen": -434.7865295410156,
425
+ "logps/rejected": -1112.9827880859375,
426
+ "loss": 0.1773,
427
+ "rewards/accuracies": 0.887499988079071,
428
+ "rewards/chosen": -2.952258825302124,
429
+ "rewards/margins": 7.3310394287109375,
430
+ "rewards/rejected": -10.283297538757324,
431
+ "step": 250
432
  },
433
  {
434
  "epoch": 0.27,
435
+ "grad_norm": 35.92606291569474,
436
+ "learning_rate": 4.5636328249082514e-07,
437
+ "logits/chosen": -0.869143009185791,
438
+ "logits/rejected": -0.5741620063781738,
439
+ "logps/chosen": -467.0962829589844,
440
+ "logps/rejected": -1112.403564453125,
441
+ "loss": 0.162,
442
+ "rewards/accuracies": 0.875,
443
+ "rewards/chosen": -3.3054141998291016,
444
+ "rewards/margins": 7.030813694000244,
445
+ "rewards/rejected": -10.336227416992188,
446
+ "step": 260
447
+ },
448
+ {
449
+ "epoch": 0.28,
450
+ "grad_norm": 53.50950070995327,
451
+ "learning_rate": 4.510653863290871e-07,
452
+ "logits/chosen": -0.9721959829330444,
453
+ "logits/rejected": -0.6126078367233276,
454
+ "logps/chosen": -439.41436767578125,
455
+ "logps/rejected": -1052.373291015625,
456
+ "loss": 0.1814,
457
+ "rewards/accuracies": 0.893750011920929,
458
+ "rewards/chosen": -2.9460256099700928,
459
+ "rewards/margins": 6.7303972244262695,
460
+ "rewards/rejected": -9.676422119140625,
461
+ "step": 270
462
  },
463
  {
464
  "epoch": 0.29,
465
+ "grad_norm": 39.535885045091504,
466
+ "learning_rate": 4.4549858303465737e-07,
467
+ "logits/chosen": -1.0113680362701416,
468
+ "logits/rejected": -0.6861897706985474,
469
+ "logps/chosen": -475.51251220703125,
470
+ "logps/rejected": -1126.9036865234375,
471
+ "loss": 0.1758,
472
+ "rewards/accuracies": 0.8812500238418579,
473
+ "rewards/chosen": -3.39562726020813,
474
+ "rewards/margins": 7.080619812011719,
475
+ "rewards/rejected": -10.47624683380127,
476
+ "step": 280
477
+ },
478
+ {
479
+ "epoch": 0.3,
480
+ "grad_norm": 27.716897646836834,
481
+ "learning_rate": 4.396703177135261e-07,
482
+ "logits/chosen": -0.9551790952682495,
483
+ "logits/rejected": -0.6880333423614502,
484
+ "logps/chosen": -410.09857177734375,
485
+ "logps/rejected": -904.8678588867188,
486
+ "loss": 0.1956,
487
+ "rewards/accuracies": 0.875,
488
+ "rewards/chosen": -2.7216389179229736,
489
+ "rewards/margins": 5.489422798156738,
490
+ "rewards/rejected": -8.21106243133545,
491
+ "step": 290
492
  },
493
  {
494
  "epoch": 0.31,
495
+ "grad_norm": 39.15676652312449,
496
+ "learning_rate": 4.335883851539693e-07,
497
+ "logits/chosen": -1.0925099849700928,
498
+ "logits/rejected": -0.763215184211731,
499
+ "logps/chosen": -293.86553955078125,
500
+ "logps/rejected": -759.0989990234375,
501
+ "loss": 0.1799,
502
+ "rewards/accuracies": 0.893750011920929,
503
+ "rewards/chosen": -1.6122547388076782,
504
+ "rewards/margins": 5.217433452606201,
505
+ "rewards/rejected": -6.82968807220459,
506
+ "step": 300
507
+ },
508
+ {
509
+ "epoch": 0.31,
510
+ "eval_logits/chosen": -1.1109943389892578,
511
+ "eval_logits/rejected": -1.0192608833312988,
512
+ "eval_logps/chosen": -903.3418579101562,
513
+ "eval_logps/rejected": -1015.4615478515625,
514
+ "eval_loss": 1.5589935779571533,
515
+ "eval_rewards/accuracies": 0.59765625,
516
+ "eval_rewards/chosen": -5.915654182434082,
517
+ "eval_rewards/margins": 1.0637093782424927,
518
+ "eval_rewards/rejected": -6.979363441467285,
519
+ "eval_runtime": 97.4807,
520
+ "eval_samples_per_second": 20.517,
521
+ "eval_steps_per_second": 0.328,
522
+ "step": 300
523
+ },
524
+ {
525
+ "epoch": 0.32,
526
+ "grad_norm": 31.761072657280515,
527
+ "learning_rate": 4.272609194017105e-07,
528
+ "logits/chosen": -1.161056637763977,
529
+ "logits/rejected": -0.8523654937744141,
530
+ "logps/chosen": -387.35833740234375,
531
+ "logps/rejected": -977.5528564453125,
532
+ "loss": 0.1806,
533
+ "rewards/accuracies": 0.8687499761581421,
534
+ "rewards/chosen": -2.436070442199707,
535
+ "rewards/margins": 6.559242248535156,
536
+ "rewards/rejected": -8.99531364440918,
537
+ "step": 310
538
  },
539
  {
540
  "epoch": 0.33,
541
+ "grad_norm": 36.03487432587838,
542
+ "learning_rate": 4.2069638288135547e-07,
543
+ "logits/chosen": -0.8736389875411987,
544
+ "logits/rejected": -0.6802955865859985,
545
+ "logps/chosen": -391.59564208984375,
546
+ "logps/rejected": -1047.556884765625,
547
+ "loss": 0.1698,
548
+ "rewards/accuracies": 0.9125000238418579,
549
+ "rewards/chosen": -2.664311170578003,
550
+ "rewards/margins": 7.013079643249512,
551
+ "rewards/rejected": -9.677392959594727,
552
+ "step": 320
553
+ },
554
+ {
555
+ "epoch": 0.35,
556
+ "grad_norm": 31.572371493306342,
557
+ "learning_rate": 4.139035550786494e-07,
558
+ "logits/chosen": -1.1484278440475464,
559
+ "logits/rejected": -0.7356959581375122,
560
+ "logps/chosen": -385.1824645996094,
561
+ "logps/rejected": -1041.6712646484375,
562
+ "loss": 0.1467,
563
+ "rewards/accuracies": 0.925000011920929,
564
+ "rewards/chosen": -2.5319342613220215,
565
+ "rewards/margins": 7.115927219390869,
566
+ "rewards/rejected": -9.647860527038574,
567
+ "step": 330
568
  },
569
  {
570
  "epoch": 0.36,
571
+ "grad_norm": 30.493158942186962,
572
+ "learning_rate": 4.0689152079869306e-07,
573
+ "logits/chosen": -1.111301064491272,
574
+ "logits/rejected": -0.7459093332290649,
575
+ "logps/chosen": -419.9774475097656,
576
+ "logps/rejected": -1145.664306640625,
577
+ "loss": 0.1589,
578
+ "rewards/accuracies": 0.9125000238418579,
579
+ "rewards/chosen": -2.8832128047943115,
580
+ "rewards/margins": 7.796321868896484,
581
+ "rewards/rejected": -10.679533958435059,
582
+ "step": 340
583
+ },
584
+ {
585
+ "epoch": 0.37,
586
+ "grad_norm": 35.73091881437617,
587
+ "learning_rate": 3.99669658015821e-07,
588
+ "logits/chosen": -0.9885336756706238,
589
+ "logits/rejected": -0.6827311515808105,
590
+ "logps/chosen": -417.18621826171875,
591
+ "logps/rejected": -1244.3375244140625,
592
+ "loss": 0.1607,
593
+ "rewards/accuracies": 0.925000011920929,
594
+ "rewards/chosen": -2.9024839401245117,
595
+ "rewards/margins": 8.765107154846191,
596
+ "rewards/rejected": -11.667591094970703,
597
+ "step": 350
598
  },
599
  {
600
  "epoch": 0.38,
601
+ "grad_norm": 26.448274353945774,
602
+ "learning_rate": 3.92247625331392e-07,
603
+ "logits/chosen": -0.9021160006523132,
604
+ "logits/rejected": -0.46636825799942017,
605
+ "logps/chosen": -396.3219299316406,
606
+ "logps/rejected": -1072.203857421875,
607
+ "loss": 0.168,
608
+ "rewards/accuracies": 0.918749988079071,
609
+ "rewards/chosen": -2.622156858444214,
610
+ "rewards/margins": 7.3311333656311035,
611
+ "rewards/rejected": -9.953289985656738,
612
+ "step": 360
613
+ },
614
+ {
615
+ "epoch": 0.39,
616
+ "grad_norm": 26.57225676522975,
617
+ "learning_rate": 3.846353490562664e-07,
618
+ "logits/chosen": -0.7423251867294312,
619
+ "logits/rejected": -0.3315241038799286,
620
+ "logps/chosen": -403.2605285644531,
621
+ "logps/rejected": -1123.38623046875,
622
+ "loss": 0.1524,
623
+ "rewards/accuracies": 0.893750011920929,
624
+ "rewards/chosen": -2.6948583126068115,
625
+ "rewards/margins": 7.7378740310668945,
626
+ "rewards/rejected": -10.432731628417969,
627
+ "step": 370
628
  },
629
  {
630
  "epoch": 0.4,
631
+ "grad_norm": 31.253155533057107,
632
+ "learning_rate": 3.768430099352445e-07,
633
+ "logits/chosen": -0.6284547448158264,
634
+ "logits/rejected": -0.2557411789894104,
635
+ "logps/chosen": -438.34454345703125,
636
+ "logps/rejected": -1187.157958984375,
637
+ "loss": 0.161,
638
+ "rewards/accuracies": 0.893750011920929,
639
+ "rewards/chosen": -3.113786220550537,
640
+ "rewards/margins": 7.986392974853516,
641
+ "rewards/rejected": -11.100178718566895,
642
+ "step": 380
643
+ },
644
+ {
645
+ "epoch": 0.41,
646
+ "grad_norm": 30.747112252887895,
647
+ "learning_rate": 3.6888102953122304e-07,
648
+ "logits/chosen": -0.888546347618103,
649
+ "logits/rejected": -0.5933178663253784,
650
+ "logps/chosen": -410.141357421875,
651
+ "logps/rejected": -1144.3177490234375,
652
+ "loss": 0.1578,
653
+ "rewards/accuracies": 0.8687499761581421,
654
+ "rewards/chosen": -2.877619981765747,
655
+ "rewards/margins": 7.820855617523193,
656
+ "rewards/rejected": -10.69847583770752,
657
+ "step": 390
658
  },
659
  {
660
  "epoch": 0.42,
661
+ "grad_norm": 46.52274946683974,
662
+ "learning_rate": 3.607600562872785e-07,
663
+ "logits/chosen": -0.6739174127578735,
664
+ "logits/rejected": -0.4188503324985504,
665
+ "logps/chosen": -447.11083984375,
666
+ "logps/rejected": -1131.9605712890625,
667
+ "loss": 0.1679,
668
+ "rewards/accuracies": 0.887499988079071,
669
+ "rewards/chosen": -3.236891508102417,
670
+ "rewards/margins": 7.345515251159668,
671
+ "rewards/rejected": -10.582406997680664,
672
+ "step": 400
673
  },
674
  {
675
  "epoch": 0.42,
676
+ "eval_logits/chosen": -0.7096253037452698,
677
+ "eval_logits/rejected": -0.5753335356712341,
678
+ "eval_logps/chosen": -1096.810791015625,
679
+ "eval_logps/rejected": -1238.125244140625,
680
+ "eval_loss": 2.102952241897583,
681
+ "eval_rewards/accuracies": 0.609375,
682
+ "eval_rewards/chosen": -7.850342750549316,
683
+ "eval_rewards/margins": 1.355657696723938,
684
+ "eval_rewards/rejected": -9.206000328063965,
685
+ "eval_runtime": 97.4138,
686
+ "eval_samples_per_second": 20.531,
687
+ "eval_steps_per_second": 0.328,
688
+ "step": 400
689
+ },
690
+ {
691
+ "epoch": 0.43,
692
+ "grad_norm": 39.27703884155879,
693
+ "learning_rate": 3.5249095128531856e-07,
694
+ "logits/chosen": -0.6762361526489258,
695
+ "logits/rejected": -0.2780495882034302,
696
+ "logps/chosen": -492.8828125,
697
+ "logps/rejected": -1225.805419921875,
698
+ "loss": 0.1705,
699
+ "rewards/accuracies": 0.856249988079071,
700
+ "rewards/chosen": -3.5533318519592285,
701
+ "rewards/margins": 7.891423225402832,
702
+ "rewards/rejected": -11.444755554199219,
703
+ "step": 410
704
  },
705
  {
706
  "epoch": 0.44,
707
+ "grad_norm": 29.296123751306272,
708
+ "learning_rate": 3.4408477372034736e-07,
709
+ "logits/chosen": -0.6263514757156372,
710
+ "logits/rejected": -0.36908912658691406,
711
+ "logps/chosen": -436.956298828125,
712
+ "logps/rejected": -1203.846923828125,
713
+ "loss": 0.1542,
714
+ "rewards/accuracies": 0.90625,
715
+ "rewards/chosen": -3.0062294006347656,
716
+ "rewards/margins": 8.188032150268555,
717
+ "rewards/rejected": -11.19426155090332,
718
+ "step": 420
719
+ },
720
+ {
721
+ "epoch": 0.45,
722
+ "grad_norm": 38.53939686623868,
723
+ "learning_rate": 3.3555276610977276e-07,
724
+ "logits/chosen": -0.7492274045944214,
725
+ "logits/rejected": -0.3440536856651306,
726
+ "logps/chosen": -440.6358337402344,
727
+ "logps/rejected": -1202.001953125,
728
+ "loss": 0.1624,
729
+ "rewards/accuracies": 0.9375,
730
+ "rewards/chosen": -3.0986926555633545,
731
+ "rewards/margins": 8.180191040039062,
732
+ "rewards/rejected": -11.278883934020996,
733
+ "step": 430
734
  },
735
  {
736
  "epoch": 0.46,
737
+ "grad_norm": 44.81056957165417,
738
+ "learning_rate": 3.269063392575352e-07,
739
+ "logits/chosen": -0.7927631139755249,
740
+ "logits/rejected": -0.4414951801300049,
741
+ "logps/chosen": -404.3497009277344,
742
+ "logps/rejected": -1111.9443359375,
743
+ "loss": 0.1883,
744
+ "rewards/accuracies": 0.862500011920929,
745
+ "rewards/chosen": -2.7665016651153564,
746
+ "rewards/margins": 7.598056793212891,
747
+ "rewards/rejected": -10.364558219909668,
748
+ "step": 440
749
+ },
750
+ {
751
+ "epoch": 0.47,
752
+ "grad_norm": 46.720798060289724,
753
+ "learning_rate": 3.1815705699316964e-07,
754
+ "logits/chosen": -0.7731117010116577,
755
+ "logits/rejected": -0.4938638210296631,
756
+ "logps/chosen": -400.8499755859375,
757
+ "logps/rejected": -1148.82177734375,
758
+ "loss": 0.1656,
759
+ "rewards/accuracies": 0.84375,
760
+ "rewards/chosen": -2.6965749263763428,
761
+ "rewards/margins": 8.003564834594727,
762
+ "rewards/rejected": -10.700139045715332,
763
+ "step": 450
764
  },
765
  {
766
  "epoch": 0.48,
767
+ "grad_norm": 35.01981360851454,
768
+ "learning_rate": 3.0931662070620794e-07,
769
+ "logits/chosen": -0.787259578704834,
770
+ "logits/rejected": -0.5048869848251343,
771
+ "logps/chosen": -389.9725036621094,
772
+ "logps/rejected": -1115.496337890625,
773
+ "loss": 0.1644,
774
+ "rewards/accuracies": 0.893750011920929,
775
+ "rewards/chosen": -2.5107271671295166,
776
+ "rewards/margins": 7.763253688812256,
777
+ "rewards/rejected": -10.273981094360352,
778
+ "step": 460
779
+ },
780
+ {
781
+ "epoch": 0.49,
782
+ "grad_norm": 32.88620144282501,
783
+ "learning_rate": 3.003968536966078e-07,
784
+ "logits/chosen": -0.8425847887992859,
785
+ "logits/rejected": -0.507870614528656,
786
+ "logps/chosen": -377.3370666503906,
787
+ "logps/rejected": -1063.682861328125,
788
+ "loss": 0.1581,
789
+ "rewards/accuracies": 0.8812500238418579,
790
+ "rewards/chosen": -2.452817440032959,
791
+ "rewards/margins": 7.454998970031738,
792
+ "rewards/rejected": -9.907815933227539,
793
+ "step": 470
794
  },
795
  {
796
  "epoch": 0.5,
797
+ "grad_norm": 55.426706045689585,
798
+ "learning_rate": 2.9140968536213693e-07,
799
+ "logits/chosen": -0.915787398815155,
800
+ "logits/rejected": -0.6263198852539062,
801
+ "logps/chosen": -401.2126770019531,
802
+ "logps/rejected": -1090.992431640625,
803
+ "loss": 0.176,
804
+ "rewards/accuracies": 0.893750011920929,
805
+ "rewards/chosen": -2.7542808055877686,
806
+ "rewards/margins": 7.400801181793213,
807
+ "rewards/rejected": -10.155081748962402,
808
+ "step": 480
809
+ },
810
+ {
811
+ "epoch": 0.51,
812
+ "grad_norm": 22.799610213421833,
813
+ "learning_rate": 2.823671352438608e-07,
814
+ "logits/chosen": -0.8439705967903137,
815
+ "logits/rejected": -0.6034272313117981,
816
+ "logps/chosen": -354.22772216796875,
817
+ "logps/rejected": -956.9412841796875,
818
+ "loss": 0.1624,
819
+ "rewards/accuracies": 0.90625,
820
+ "rewards/chosen": -2.377206802368164,
821
+ "rewards/margins": 6.480790615081787,
822
+ "rewards/rejected": -8.85799789428711,
823
+ "step": 490
824
  },
825
  {
826
  "epoch": 0.52,
827
+ "grad_norm": 39.88720282221281,
828
+ "learning_rate": 2.73281296951072e-07,
829
+ "logits/chosen": -1.0180326700210571,
830
+ "logits/rejected": -0.7310226559638977,
831
+ "logps/chosen": -385.81866455078125,
832
+ "logps/rejected": -1014.95458984375,
833
+ "loss": 0.1693,
834
+ "rewards/accuracies": 0.925000011920929,
835
+ "rewards/chosen": -2.5694196224212646,
836
+ "rewards/margins": 6.7894439697265625,
837
+ "rewards/rejected": -9.358863830566406,
838
+ "step": 500
839
+ },
840
+ {
841
+ "epoch": 0.52,
842
+ "eval_logits/chosen": -0.987293541431427,
843
+ "eval_logits/rejected": -0.8597699403762817,
844
+ "eval_logps/chosen": -945.861083984375,
845
+ "eval_logps/rejected": -1084.707763671875,
846
+ "eval_loss": 1.6562931537628174,
847
+ "eval_rewards/accuracies": 0.625,
848
+ "eval_rewards/chosen": -6.340845108032227,
849
+ "eval_rewards/margins": 1.3309805393218994,
850
+ "eval_rewards/rejected": -7.671825885772705,
851
+ "eval_runtime": 97.6234,
852
+ "eval_samples_per_second": 20.487,
853
+ "eval_steps_per_second": 0.328,
854
+ "step": 500
855
+ },
856
+ {
857
+ "epoch": 0.53,
858
+ "grad_norm": 36.2723513365496,
859
+ "learning_rate": 2.641643219871597e-07,
860
+ "logits/chosen": -1.0296061038970947,
861
+ "logits/rejected": -0.6009940505027771,
862
+ "logps/chosen": -367.12713623046875,
863
+ "logps/rejected": -957.3023681640625,
864
+ "loss": 0.1488,
865
+ "rewards/accuracies": 0.918749988079071,
866
+ "rewards/chosen": -2.196636199951172,
867
+ "rewards/margins": 6.570919990539551,
868
+ "rewards/rejected": -8.767557144165039,
869
+ "step": 510
870
  },
871
  {
872
  "epoch": 0.54,
873
+ "grad_norm": 41.00764971947524,
874
+ "learning_rate": 2.550284034980507e-07,
875
+ "logits/chosen": -0.9317744970321655,
876
+ "logits/rejected": -0.4665905833244324,
877
+ "logps/chosen": -389.6575012207031,
878
+ "logps/rejected": -1017.7044067382812,
879
+ "loss": 0.1731,
880
+ "rewards/accuracies": 0.875,
881
+ "rewards/chosen": -2.4715576171875,
882
+ "rewards/margins": 6.838615417480469,
883
+ "rewards/rejected": -9.310173034667969,
884
+ "step": 520
885
  },
886
  {
887
+ "epoch": 0.55,
888
+ "grad_norm": 24.760088451093488,
889
+ "learning_rate": 2.4588575996495794e-07,
890
+ "logits/chosen": -1.0027564764022827,
891
+ "logits/rejected": -0.5636709928512573,
892
+ "logps/chosen": -361.3106384277344,
893
+ "logps/rejected": -1032.2296142578125,
894
+ "loss": 0.1409,
895
+ "rewards/accuracies": 0.925000011920929,
896
+ "rewards/chosen": -2.317450761795044,
897
+ "rewards/margins": 7.240323066711426,
898
+ "rewards/rejected": -9.557774543762207,
899
+ "step": 530
900
+ },
901
+ {
902
+ "epoch": 0.57,
903
+ "grad_norm": 29.45673267405302,
904
+ "learning_rate": 2.367486188632446e-07,
905
+ "logits/chosen": -0.6549164652824402,
906
+ "logits/rejected": -0.2623385787010193,
907
+ "logps/chosen": -400.9425048828125,
908
+ "logps/rejected": -1065.4869384765625,
909
+ "loss": 0.1447,
910
+ "rewards/accuracies": 0.9125000238418579,
911
+ "rewards/chosen": -2.686591863632202,
912
+ "rewards/margins": 7.183202266693115,
913
+ "rewards/rejected": -9.869793891906738,
914
+ "step": 540
915
+ },
916
+ {
917
+ "epoch": 0.58,
918
+ "grad_norm": 28.112202270911702,
919
+ "learning_rate": 2.276292003092593e-07,
920
+ "logits/chosen": -0.7201762199401855,
921
+ "logits/rejected": -0.20686273276805878,
922
+ "logps/chosen": -414.7852478027344,
923
+ "logps/rejected": -1124.4974365234375,
924
+ "loss": 0.154,
925
+ "rewards/accuracies": 0.90625,
926
+ "rewards/chosen": -2.9022722244262695,
927
+ "rewards/margins": 7.623268127441406,
928
+ "rewards/rejected": -10.525540351867676,
929
+ "step": 550
930
  },
931
  {
932
  "epoch": 0.59,
933
+ "grad_norm": 39.46687358645063,
934
+ "learning_rate": 2.185397007170141e-07,
935
+ "logits/chosen": -0.40733662247657776,
936
+ "logits/rejected": 0.02392803505063057,
937
+ "logps/chosen": -366.5719299316406,
938
+ "logps/rejected": -931.62841796875,
939
+ "loss": 0.1679,
940
+ "rewards/accuracies": 0.856249988079071,
941
+ "rewards/chosen": -2.599259853363037,
942
+ "rewards/margins": 6.089108467102051,
943
+ "rewards/rejected": -8.68836784362793,
944
+ "step": 560
945
+ },
946
+ {
947
+ "epoch": 0.6,
948
+ "grad_norm": 45.19451118355015,
949
+ "learning_rate": 2.094922764865619e-07,
950
+ "logits/chosen": -0.6102726459503174,
951
+ "logits/rejected": -0.25020402669906616,
952
+ "logps/chosen": -418.000732421875,
953
+ "logps/rejected": -1118.455322265625,
954
+ "loss": 0.1527,
955
+ "rewards/accuracies": 0.8812500238418579,
956
+ "rewards/chosen": -2.813910722732544,
957
+ "rewards/margins": 7.487911224365234,
958
+ "rewards/rejected": -10.3018217086792,
959
+ "step": 570
960
  },
961
  {
962
  "epoch": 0.61,
963
+ "grad_norm": 27.296281473923543,
964
+ "learning_rate": 2.0049902774588797e-07,
965
+ "logits/chosen": -0.5131772756576538,
966
+ "logits/rejected": -0.07118790596723557,
967
+ "logps/chosen": -358.7304992675781,
968
+ "logps/rejected": -981.1790161132812,
969
+ "loss": 0.1379,
970
+ "rewards/accuracies": 0.9125000238418579,
971
+ "rewards/chosen": -2.248889923095703,
972
+ "rewards/margins": 6.8233489990234375,
973
+ "rewards/rejected": -9.072239875793457,
974
+ "step": 580
975
+ },
976
+ {
977
+ "epoch": 0.62,
978
+ "grad_norm": 28.400791802256467,
979
+ "learning_rate": 1.9157198216806238e-07,
980
+ "logits/chosen": -0.5218924283981323,
981
+ "logits/rejected": -0.11522813141345978,
982
+ "logps/chosen": -385.2206115722656,
983
+ "logps/rejected": -1059.663330078125,
984
+ "loss": 0.1526,
985
+ "rewards/accuracies": 0.90625,
986
+ "rewards/chosen": -2.5596184730529785,
987
+ "rewards/margins": 7.243247032165527,
988
+ "rewards/rejected": -9.802865028381348,
989
+ "step": 590
990
  },
991
  {
992
  "epoch": 0.63,
993
+ "grad_norm": 36.01204411609108,
994
+ "learning_rate": 1.8272307888529274e-07,
995
+ "logits/chosen": -0.6575510501861572,
996
+ "logits/rejected": -0.13078925013542175,
997
+ "logps/chosen": -393.07891845703125,
998
+ "logps/rejected": -1083.641357421875,
999
+ "loss": 0.1609,
1000
+ "rewards/accuracies": 0.9125000238418579,
1001
+ "rewards/chosen": -2.5931639671325684,
1002
+ "rewards/margins": 7.458055019378662,
1003
+ "rewards/rejected": -10.05121898651123,
1004
+ "step": 600
1005
  },
1006
  {
1007
  "epoch": 0.63,
1008
+ "eval_logits/chosen": -0.616359293460846,
1009
+ "eval_logits/rejected": -0.45154044032096863,
1010
+ "eval_logps/chosen": -959.7227172851562,
1011
+ "eval_logps/rejected": -1097.447998046875,
1012
+ "eval_loss": 1.6818441152572632,
1013
+ "eval_rewards/accuracies": 0.62109375,
1014
+ "eval_rewards/chosen": -6.479461669921875,
1015
+ "eval_rewards/margins": 1.3197669982910156,
1016
+ "eval_rewards/rejected": -7.799228668212891,
1017
+ "eval_runtime": 97.4083,
1018
+ "eval_samples_per_second": 20.532,
1019
+ "eval_steps_per_second": 0.329,
1020
+ "step": 600
1021
+ },
1022
+ {
1023
+ "epoch": 0.64,
1024
+ "grad_norm": 25.93993016029263,
1025
+ "learning_rate": 1.7396415252139288e-07,
1026
+ "logits/chosen": -0.9318272471427917,
1027
+ "logits/rejected": -0.5557786226272583,
1028
+ "logps/chosen": -372.4070129394531,
1029
+ "logps/rejected": -1039.4412841796875,
1030
+ "loss": 0.1523,
1031
+ "rewards/accuracies": 0.9125000238418579,
1032
+ "rewards/chosen": -2.3948473930358887,
1033
+ "rewards/margins": 7.164391994476318,
1034
+ "rewards/rejected": -9.559239387512207,
1035
+ "step": 610
1036
  },
1037
  {
1038
  "epoch": 0.65,
1039
+ "grad_norm": 43.56570182026958,
1040
+ "learning_rate": 1.6530691736402316e-07,
1041
+ "logits/chosen": -0.6166467070579529,
1042
+ "logits/rejected": -0.15916576981544495,
1043
+ "logps/chosen": -382.71112060546875,
1044
+ "logps/rejected": -1044.164794921875,
1045
+ "loss": 0.1514,
1046
+ "rewards/accuracies": 0.887499988079071,
1047
+ "rewards/chosen": -2.561720848083496,
1048
+ "rewards/margins": 7.1611833572387695,
1049
+ "rewards/rejected": -9.722905158996582,
1050
+ "step": 620
1051
+ },
1052
+ {
1053
+ "epoch": 0.66,
1054
+ "grad_norm": 28.928080380488158,
1055
+ "learning_rate": 1.5676295169786864e-07,
1056
+ "logits/chosen": -0.5803557634353638,
1057
+ "logits/rejected": -0.2489413022994995,
1058
+ "logps/chosen": -383.3107604980469,
1059
+ "logps/rejected": -997.8870239257812,
1060
+ "loss": 0.164,
1061
+ "rewards/accuracies": 0.887499988079071,
1062
+ "rewards/chosen": -2.578981876373291,
1063
+ "rewards/margins": 6.61221170425415,
1064
+ "rewards/rejected": -9.191193580627441,
1065
+ "step": 630
1066
  },
1067
  {
1068
  "epoch": 0.67,
1069
+ "grad_norm": 20.471962287027644,
1070
+ "learning_rate": 1.483436823197092e-07,
1071
+ "logits/chosen": -0.6895365118980408,
1072
+ "logits/rejected": -0.1990533173084259,
1073
+ "logps/chosen": -362.12237548828125,
1074
+ "logps/rejected": -1061.60400390625,
1075
+ "loss": 0.1475,
1076
+ "rewards/accuracies": 0.956250011920929,
1077
+ "rewards/chosen": -2.2892990112304688,
1078
+ "rewards/margins": 7.556910514831543,
1079
+ "rewards/rejected": -9.846209526062012,
1080
+ "step": 640
1081
+ },
1082
+ {
1083
+ "epoch": 0.68,
1084
+ "grad_norm": 26.665412290672624,
1085
+ "learning_rate": 1.4006036925609243e-07,
1086
+ "logits/chosen": -0.6053385734558105,
1087
+ "logits/rejected": -0.19510070979595184,
1088
+ "logps/chosen": -381.46514892578125,
1089
+ "logps/rejected": -1129.877685546875,
1090
+ "loss": 0.1432,
1091
+ "rewards/accuracies": 0.90625,
1092
+ "rewards/chosen": -2.4206106662750244,
1093
+ "rewards/margins": 7.994618892669678,
1094
+ "rewards/rejected": -10.415228843688965,
1095
+ "step": 650
1096
  },
1097
  {
1098
  "epoch": 0.69,
1099
+ "grad_norm": 33.292289854748574,
1100
+ "learning_rate": 1.319240907040458e-07,
1101
+ "logits/chosen": -0.5973755717277527,
1102
+ "logits/rejected": -0.16952477395534515,
1103
+ "logps/chosen": -410.3275451660156,
1104
+ "logps/rejected": -1123.840087890625,
1105
+ "loss": 0.1388,
1106
+ "rewards/accuracies": 0.8999999761581421,
1107
+ "rewards/chosen": -2.748767852783203,
1108
+ "rewards/margins": 7.701995849609375,
1109
+ "rewards/rejected": -10.450764656066895,
1110
+ "step": 660
1111
+ },
1112
+ {
1113
+ "epoch": 0.7,
1114
+ "grad_norm": 30.896865825928337,
1115
+ "learning_rate": 1.239457282149695e-07,
1116
+ "logits/chosen": -0.5566490292549133,
1117
+ "logits/rejected": -0.23316040635108948,
1118
+ "logps/chosen": -428.1212463378906,
1119
+ "logps/rejected": -1241.2476806640625,
1120
+ "loss": 0.1475,
1121
+ "rewards/accuracies": 0.887499988079071,
1122
+ "rewards/chosen": -3.0948407649993896,
1123
+ "rewards/margins": 8.58587646484375,
1124
+ "rewards/rejected": -11.680717468261719,
1125
+ "step": 670
1126
  },
1127
  {
1128
  "epoch": 0.71,
1129
+ "grad_norm": 35.84504872343951,
1130
+ "learning_rate": 1.1613595214152711e-07,
1131
+ "logits/chosen": -0.5821831822395325,
1132
+ "logits/rejected": -0.07458965480327606,
1133
+ "logps/chosen": -399.4475402832031,
1134
+ "logps/rejected": -1166.2537841796875,
1135
+ "loss": 0.1518,
1136
+ "rewards/accuracies": 0.9312499761581421,
1137
+ "rewards/chosen": -2.7263970375061035,
1138
+ "rewards/margins": 8.200857162475586,
1139
+ "rewards/rejected": -10.927252769470215,
1140
+ "step": 680
1141
+ },
1142
+ {
1143
+ "epoch": 0.72,
1144
+ "grad_norm": 33.84684500335279,
1145
+ "learning_rate": 1.0850520736699362e-07,
1146
+ "logits/chosen": -0.5655028223991394,
1147
+ "logits/rejected": -0.15712787210941315,
1148
+ "logps/chosen": -377.57208251953125,
1149
+ "logps/rejected": -1100.81884765625,
1150
+ "loss": 0.1454,
1151
+ "rewards/accuracies": 0.90625,
1152
+ "rewards/chosen": -2.5599417686462402,
1153
+ "rewards/margins": 7.693644046783447,
1154
+ "rewards/rejected": -10.253584861755371,
1155
+ "step": 690
1156
  },
1157
  {
1158
  "epoch": 0.73,
1159
+ "grad_norm": 47.388927709457874,
1160
+ "learning_rate": 1.0106369933615042e-07,
1161
+ "logits/chosen": -0.2697560787200928,
1162
+ "logits/rejected": 0.21001645922660828,
1163
+ "logps/chosen": -389.8284606933594,
1164
+ "logps/rejected": -1081.4793701171875,
1165
+ "loss": 0.1559,
1166
+ "rewards/accuracies": 0.893750011920929,
1167
+ "rewards/chosen": -2.6655356884002686,
1168
+ "rewards/margins": 7.382843971252441,
1169
+ "rewards/rejected": -10.048379898071289,
1170
+ "step": 700
1171
+ },
1172
+ {
1173
+ "epoch": 0.73,
1174
+ "eval_logits/chosen": -0.5852116346359253,
1175
+ "eval_logits/rejected": -0.4166191518306732,
1176
+ "eval_logps/chosen": -1046.6217041015625,
1177
+ "eval_logps/rejected": -1197.0731201171875,
1178
+ "eval_loss": 1.9277653694152832,
1179
+ "eval_rewards/accuracies": 0.61328125,
1180
+ "eval_rewards/chosen": -7.348450660705566,
1181
+ "eval_rewards/margins": 1.447028398513794,
1182
+ "eval_rewards/rejected": -8.795478820800781,
1183
+ "eval_runtime": 97.4242,
1184
+ "eval_samples_per_second": 20.529,
1185
+ "eval_steps_per_second": 0.328,
1186
+ "step": 700
1187
+ },
1188
+ {
1189
+ "epoch": 0.74,
1190
+ "grad_norm": 21.54892454805239,
1191
+ "learning_rate": 9.382138040640714e-08,
1192
+ "logits/chosen": -0.6082885265350342,
1193
+ "logits/rejected": 0.03385084122419357,
1194
+ "logps/chosen": -389.99542236328125,
1195
+ "logps/rejected": -1186.9339599609375,
1196
+ "loss": 0.1358,
1197
+ "rewards/accuracies": 0.9312499761581421,
1198
+ "rewards/chosen": -2.5867698192596436,
1199
+ "rewards/margins": 8.532285690307617,
1200
+ "rewards/rejected": -11.11905574798584,
1201
+ "step": 710
1202
  },
1203
  {
1204
  "epoch": 0.75,
1205
+ "grad_norm": 45.770131366856944,
1206
+ "learning_rate": 8.678793653740632e-08,
1207
+ "logits/chosen": -0.45069313049316406,
1208
+ "logits/rejected": -0.01569805108010769,
1209
+ "logps/chosen": -418.41015625,
1210
+ "logps/rejected": -1100.5294189453125,
1211
+ "loss": 0.1518,
1212
+ "rewards/accuracies": 0.8812500238418579,
1213
+ "rewards/chosen": -2.9045753479003906,
1214
+ "rewards/margins": 7.348256587982178,
1215
+ "rewards/rejected": -10.252832412719727,
1216
+ "step": 720
1217
+ },
1218
+ {
1219
+ "epoch": 0.76,
1220
+ "grad_norm": 26.24794912329421,
1221
+ "learning_rate": 7.997277433690983e-08,
1222
+ "logits/chosen": -0.6921511292457581,
1223
+ "logits/rejected": -0.1756693720817566,
1224
+ "logps/chosen": -402.31463623046875,
1225
+ "logps/rejected": -1098.74951171875,
1226
+ "loss": 0.1545,
1227
+ "rewards/accuracies": 0.918749988079071,
1228
+ "rewards/chosen": -2.674762725830078,
1229
+ "rewards/margins": 7.446639060974121,
1230
+ "rewards/rejected": -10.1214017868042,
1231
+ "step": 730
1232
  },
1233
  {
1234
  "epoch": 0.77,
1235
+ "grad_norm": 42.21104117559166,
1236
+ "learning_rate": 7.338500848029602e-08,
1237
+ "logits/chosen": -0.5230437517166138,
1238
+ "logits/rejected": -0.08038869500160217,
1239
+ "logps/chosen": -374.2379150390625,
1240
+ "logps/rejected": -1173.2691650390625,
1241
+ "loss": 0.1524,
1242
+ "rewards/accuracies": 0.90625,
1243
+ "rewards/chosen": -2.3602004051208496,
1244
+ "rewards/margins": 8.614425659179688,
1245
+ "rewards/rejected": -10.974625587463379,
1246
+ "step": 740
1247
  },
1248
  {
1249
+ "epoch": 0.78,
1250
+ "grad_norm": 45.35711154178482,
1251
+ "learning_rate": 6.70334495204884e-08,
1252
+ "logits/chosen": -0.4201095998287201,
1253
+ "logits/rejected": 0.112357497215271,
1254
+ "logps/chosen": -399.91766357421875,
1255
+ "logps/rejected": -1149.41357421875,
1256
+ "loss": 0.1425,
1257
+ "rewards/accuracies": 0.918749988079071,
1258
+ "rewards/chosen": -2.750382900238037,
1259
+ "rewards/margins": 8.02344799041748,
1260
+ "rewards/rejected": -10.773832321166992,
1261
+ "step": 750
1262
+ },
1263
+ {
1264
+ "epoch": 0.8,
1265
+ "grad_norm": 26.89122562285564,
1266
+ "learning_rate": 6.092659210462231e-08,
1267
+ "logits/chosen": -0.6679720878601074,
1268
+ "logits/rejected": -0.1197366863489151,
1269
+ "logps/chosen": -417.52996826171875,
1270
+ "logps/rejected": -1188.0615234375,
1271
+ "loss": 0.1345,
1272
+ "rewards/accuracies": 0.8999999761581421,
1273
+ "rewards/chosen": -2.673654079437256,
1274
+ "rewards/margins": 8.336530685424805,
1275
+ "rewards/rejected": -11.010185241699219,
1276
+ "step": 760
1277
+ },
1278
+ {
1279
+ "epoch": 0.81,
1280
+ "grad_norm": 34.7731803926411,
1281
+ "learning_rate": 5.507260361320737e-08,
1282
+ "logits/chosen": -0.6739757657051086,
1283
+ "logits/rejected": -0.05404149740934372,
1284
+ "logps/chosen": -402.13238525390625,
1285
+ "logps/rejected": -1078.5826416015625,
1286
+ "loss": 0.1516,
1287
+ "rewards/accuracies": 0.893750011920929,
1288
+ "rewards/chosen": -2.7083559036254883,
1289
+ "rewards/margins": 7.324588775634766,
1290
+ "rewards/rejected": -10.032943725585938,
1291
+ "step": 770
1292
  },
1293
  {
1294
  "epoch": 0.82,
1295
+ "grad_norm": 25.35847618120489,
1296
+ "learning_rate": 4.947931323697982e-08,
1297
+ "logits/chosen": -0.5962761044502258,
1298
+ "logits/rejected": -0.05018671602010727,
1299
+ "logps/chosen": -392.66363525390625,
1300
+ "logps/rejected": -1066.8021240234375,
1301
+ "loss": 0.1551,
1302
+ "rewards/accuracies": 0.9125000238418579,
1303
+ "rewards/chosen": -2.6160168647766113,
1304
+ "rewards/margins": 7.251564025878906,
1305
+ "rewards/rejected": -9.86758041381836,
1306
+ "step": 780
1307
+ },
1308
+ {
1309
+ "epoch": 0.83,
1310
+ "grad_norm": 27.140797859113846,
1311
+ "learning_rate": 4.415420150605398e-08,
1312
+ "logits/chosen": -0.6300855278968811,
1313
+ "logits/rejected": -0.1746017038822174,
1314
+ "logps/chosen": -400.11431884765625,
1315
+ "logps/rejected": -1069.177001953125,
1316
+ "loss": 0.1616,
1317
+ "rewards/accuracies": 0.875,
1318
+ "rewards/chosen": -2.7479443550109863,
1319
+ "rewards/margins": 7.171761989593506,
1320
+ "rewards/rejected": -9.919707298278809,
1321
+ "step": 790
1322
  },
1323
  {
1324
  "epoch": 0.84,
1325
+ "grad_norm": 20.145953031877735,
1326
+ "learning_rate": 3.9104390285376374e-08,
1327
+ "logits/chosen": -0.44818106293678284,
1328
+ "logits/rejected": -0.035945743322372437,
1329
+ "logps/chosen": -396.0670471191406,
1330
+ "logps/rejected": -1147.34619140625,
1331
+ "loss": 0.1433,
1332
+ "rewards/accuracies": 0.9125000238418579,
1333
+ "rewards/chosen": -2.6665611267089844,
1334
+ "rewards/margins": 8.064103126525879,
1335
+ "rewards/rejected": -10.730664253234863,
1336
+ "step": 800
1337
  },
1338
  {
1339
  "epoch": 0.84,
1340
+ "eval_logits/chosen": -0.6744760870933533,
1341
+ "eval_logits/rejected": -0.5140590667724609,
1342
+ "eval_logps/chosen": -1026.7318115234375,
1343
+ "eval_logps/rejected": -1180.040283203125,
1344
+ "eval_loss": 1.9049861431121826,
1345
+ "eval_rewards/accuracies": 0.6171875,
1346
+ "eval_rewards/chosen": -7.149553298950195,
1347
+ "eval_rewards/margins": 1.4755982160568237,
1348
+ "eval_rewards/rejected": -8.625151634216309,
1349
+ "eval_runtime": 97.364,
1350
+ "eval_samples_per_second": 20.541,
1351
+ "eval_steps_per_second": 0.329,
1352
+ "step": 800
1353
+ },
1354
+ {
1355
+ "epoch": 0.85,
1356
+ "grad_norm": 21.539181167828147,
1357
+ "learning_rate": 3.433663324986208e-08,
1358
+ "logits/chosen": -0.5423880815505981,
1359
+ "logits/rejected": -0.08916589617729187,
1360
+ "logps/chosen": -378.41876220703125,
1361
+ "logps/rejected": -1105.344970703125,
1362
+ "loss": 0.156,
1363
+ "rewards/accuracies": 0.9125000238418579,
1364
+ "rewards/chosen": -2.467062473297119,
1365
+ "rewards/margins": 7.7889909744262695,
1366
+ "rewards/rejected": -10.256052017211914,
1367
+ "step": 810
1368
  },
1369
  {
1370
  "epoch": 0.86,
1371
+ "grad_norm": 25.60967931137277,
1372
+ "learning_rate": 2.9857306851953897e-08,
1373
+ "logits/chosen": -0.5412378907203674,
1374
+ "logits/rejected": -0.05382275581359863,
1375
+ "logps/chosen": -373.0191345214844,
1376
+ "logps/rejected": -1065.0948486328125,
1377
+ "loss": 0.1524,
1378
+ "rewards/accuracies": 0.8812500238418579,
1379
+ "rewards/chosen": -2.4381911754608154,
1380
+ "rewards/margins": 7.450464725494385,
1381
+ "rewards/rejected": -9.888655662536621,
1382
+ "step": 820
1383
+ },
1384
+ {
1385
+ "epoch": 0.87,
1386
+ "grad_norm": 49.176125880409266,
1387
+ "learning_rate": 2.567240179368185e-08,
1388
+ "logits/chosen": -0.5491518378257751,
1389
+ "logits/rejected": 0.00046962351188994944,
1390
+ "logps/chosen": -376.9706115722656,
1391
+ "logps/rejected": -1090.8355712890625,
1392
+ "loss": 0.1429,
1393
+ "rewards/accuracies": 0.8812500238418579,
1394
+ "rewards/chosen": -2.460528612136841,
1395
+ "rewards/margins": 7.668572902679443,
1396
+ "rewards/rejected": -10.129101753234863,
1397
+ "step": 830
1398
  },
1399
  {
1400
  "epoch": 0.88,
1401
+ "grad_norm": 37.134305931663185,
1402
+ "learning_rate": 2.1787515014630357e-08,
1403
+ "logits/chosen": -0.8858461380004883,
1404
+ "logits/rejected": -0.30200493335723877,
1405
+ "logps/chosen": -372.07769775390625,
1406
+ "logps/rejected": -1134.0087890625,
1407
+ "loss": 0.1405,
1408
+ "rewards/accuracies": 0.9437500238418579,
1409
+ "rewards/chosen": -2.2380013465881348,
1410
+ "rewards/margins": 8.35414981842041,
1411
+ "rewards/rejected": -10.592150688171387,
1412
+ "step": 840
1413
+ },
1414
+ {
1415
+ "epoch": 0.89,
1416
+ "grad_norm": 44.332126805939915,
1417
+ "learning_rate": 1.820784220652766e-08,
1418
+ "logits/chosen": -0.44481903314590454,
1419
+ "logits/rejected": -0.00442737340927124,
1420
+ "logps/chosen": -393.5993957519531,
1421
+ "logps/rejected": -1024.645751953125,
1422
+ "loss": 0.1641,
1423
+ "rewards/accuracies": 0.8500000238418579,
1424
+ "rewards/chosen": -2.714064121246338,
1425
+ "rewards/margins": 6.799878120422363,
1426
+ "rewards/rejected": -9.513941764831543,
1427
+ "step": 850
1428
  },
1429
  {
1430
  "epoch": 0.9,
1431
+ "grad_norm": 27.781110281391676,
1432
+ "learning_rate": 1.4938170864468636e-08,
1433
+ "logits/chosen": -0.713314414024353,
1434
+ "logits/rejected": -0.28505033254623413,
1435
+ "logps/chosen": -372.9283447265625,
1436
+ "logps/rejected": -1127.688232421875,
1437
+ "loss": 0.151,
1438
+ "rewards/accuracies": 0.9437500238418579,
1439
+ "rewards/chosen": -2.429081678390503,
1440
+ "rewards/margins": 8.078619003295898,
1441
+ "rewards/rejected": -10.507699966430664,
1442
+ "step": 860
1443
+ },
1444
+ {
1445
+ "epoch": 0.91,
1446
+ "grad_norm": 40.33670958727325,
1447
+ "learning_rate": 1.1982873884064465e-08,
1448
+ "logits/chosen": -0.642200767993927,
1449
+ "logits/rejected": -0.2615371346473694,
1450
+ "logps/chosen": -382.0066223144531,
1451
+ "logps/rejected": -1205.7874755859375,
1452
+ "loss": 0.1425,
1453
+ "rewards/accuracies": 0.8999999761581421,
1454
+ "rewards/chosen": -2.4844939708709717,
1455
+ "rewards/margins": 8.8142728805542,
1456
+ "rewards/rejected": -11.298765182495117,
1457
+ "step": 870
1458
  },
1459
  {
1460
  "epoch": 0.92,
1461
+ "grad_norm": 41.06397869005653,
1462
+ "learning_rate": 9.345903713082304e-09,
1463
+ "logits/chosen": -0.6282533407211304,
1464
+ "logits/rejected": -0.19878247380256653,
1465
+ "logps/chosen": -400.76605224609375,
1466
+ "logps/rejected": -1277.481689453125,
1467
+ "loss": 0.1417,
1468
+ "rewards/accuracies": 0.9375,
1469
+ "rewards/chosen": -2.5621111392974854,
1470
+ "rewards/margins": 9.376588821411133,
1471
+ "rewards/rejected": -11.938699722290039,
1472
+ "step": 880
1473
+ },
1474
+ {
1475
+ "epoch": 0.93,
1476
+ "grad_norm": 59.83522561090434,
1477
+ "learning_rate": 7.030787065396865e-09,
1478
+ "logits/chosen": -0.5553107857704163,
1479
+ "logits/rejected": -0.15107165277004242,
1480
+ "logps/chosen": -375.3813781738281,
1481
+ "logps/rejected": -1131.446533203125,
1482
+ "loss": 0.1561,
1483
+ "rewards/accuracies": 0.875,
1484
+ "rewards/chosen": -2.4943184852600098,
1485
+ "rewards/margins": 8.074764251708984,
1486
+ "rewards/rejected": -10.56908130645752,
1487
+ "step": 890
1488
  },
1489
  {
1490
  "epoch": 0.94,
1491
+ "grad_norm": 20.123097535281502,
1492
+ "learning_rate": 5.04062020432286e-09,
1493
+ "logits/chosen": -0.5520753860473633,
1494
+ "logits/rejected": -0.19162021577358246,
1495
+ "logps/chosen": -359.1523742675781,
1496
+ "logps/rejected": -1050.653076171875,
1497
+ "loss": 0.1479,
1498
+ "rewards/accuracies": 0.8999999761581421,
1499
+ "rewards/chosen": -2.378551959991455,
1500
+ "rewards/margins": 7.357401371002197,
1501
+ "rewards/rejected": -9.735953330993652,
1502
+ "step": 900
1503
+ },
1504
+ {
1505
+ "epoch": 0.94,
1506
+ "eval_logits/chosen": -0.7198767066001892,
1507
+ "eval_logits/rejected": -0.5643453598022461,
1508
+ "eval_logps/chosen": -1010.4668579101562,
1509
+ "eval_logps/rejected": -1164.5386962890625,
1510
+ "eval_loss": 1.8978877067565918,
1511
+ "eval_rewards/accuracies": 0.609375,
1512
+ "eval_rewards/chosen": -6.986903190612793,
1513
+ "eval_rewards/margins": 1.483232855796814,
1514
+ "eval_rewards/rejected": -8.470136642456055,
1515
+ "eval_runtime": 97.3726,
1516
+ "eval_samples_per_second": 20.54,
1517
+ "eval_steps_per_second": 0.329,
1518
+ "step": 900
1519
+ },
1520
+ {
1521
+ "epoch": 0.95,
1522
+ "grad_norm": 27.829801809728036,
1523
+ "learning_rate": 3.3780648016376866e-09,
1524
+ "logits/chosen": -0.5426167249679565,
1525
+ "logits/rejected": -0.20517143607139587,
1526
+ "logps/chosen": -367.1676330566406,
1527
+ "logps/rejected": -1091.75390625,
1528
+ "loss": 0.1338,
1529
+ "rewards/accuracies": 0.9125000238418579,
1530
+ "rewards/chosen": -2.459343910217285,
1531
+ "rewards/margins": 7.6989898681640625,
1532
+ "rewards/rejected": -10.158334732055664,
1533
+ "step": 910
1534
  },
1535
  {
1536
  "epoch": 0.96,
1537
+ "grad_norm": 28.33961947218242,
1538
+ "learning_rate": 2.0453443778310766e-09,
1539
+ "logits/chosen": -0.6683856844902039,
1540
+ "logits/rejected": -0.24923817813396454,
1541
+ "logps/chosen": -397.66192626953125,
1542
+ "logps/rejected": -1160.3267822265625,
1543
+ "loss": 0.1607,
1544
+ "rewards/accuracies": 0.887499988079071,
1545
+ "rewards/chosen": -2.5613341331481934,
1546
+ "rewards/margins": 8.2199125289917,
1547
+ "rewards/rejected": -10.781246185302734,
1548
+ "step": 920
1549
+ },
1550
+ {
1551
+ "epoch": 0.97,
1552
+ "grad_norm": 31.245120668993923,
1553
+ "learning_rate": 1.0442413283435758e-09,
1554
+ "logits/chosen": -0.5892433524131775,
1555
+ "logits/rejected": -0.16971439123153687,
1556
+ "logps/chosen": -382.60394287109375,
1557
+ "logps/rejected": -1098.789306640625,
1558
+ "loss": 0.1559,
1559
+ "rewards/accuracies": 0.893750011920929,
1560
+ "rewards/chosen": -2.5811736583709717,
1561
+ "rewards/margins": 7.667700290679932,
1562
+ "rewards/rejected": -10.248873710632324,
1563
+ "step": 930
1564
  },
1565
  {
1566
  "epoch": 0.98,
1567
+ "grad_norm": 34.32124556738926,
1568
+ "learning_rate": 3.760945397705828e-10,
1569
+ "logits/chosen": -0.6785596013069153,
1570
+ "logits/rejected": -0.16107648611068726,
1571
+ "logps/chosen": -382.91876220703125,
1572
+ "logps/rejected": -1063.984375,
1573
+ "loss": 0.152,
1574
+ "rewards/accuracies": 0.875,
1575
+ "rewards/chosen": -2.6222147941589355,
1576
+ "rewards/margins": 7.256742000579834,
1577
+ "rewards/rejected": -9.878957748413086,
1578
+ "step": 940
1579
+ },
1580
+ {
1581
+ "epoch": 0.99,
1582
+ "grad_norm": 33.81539431852237,
1583
+ "learning_rate": 4.17975992204056e-11,
1584
+ "logits/chosen": -0.4908576011657715,
1585
+ "logits/rejected": -0.08039870113134384,
1586
+ "logps/chosen": -358.55047607421875,
1587
+ "logps/rejected": -1048.7579345703125,
1588
+ "loss": 0.1541,
1589
+ "rewards/accuracies": 0.90625,
1590
+ "rewards/chosen": -2.3112599849700928,
1591
+ "rewards/margins": 7.406725883483887,
1592
+ "rewards/rejected": -9.717985153198242,
1593
+ "step": 950
1594
  },
1595
  {
1596
  "epoch": 1.0,
1597
+ "step": 955,
1598
  "total_flos": 0.0,
1599
+ "train_loss": 0.1961859940234279,
1600
+ "train_runtime": 15468.9338,
1601
+ "train_samples_per_second": 7.904,
1602
+ "train_steps_per_second": 0.062
1603
  }
1604
  ],
1605
  "logging_steps": 10,
1606
+ "max_steps": 955,
1607
  "num_input_tokens_seen": 0,
1608
  "num_train_epochs": 1,
1609
  "save_steps": 100,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a0eaf9e5538cf0fdfc4e8fe8d9145b675dbaffdc612df10f426c4a87429492c
3
  size 6264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eecd791d9666b4084eb7e99fb09071ac1531764dcbc88a460e7ab273a6a26f1
3
  size 6264