Model save
Browse files- all_results.json +5 -5
- model-00001-of-00003.safetensors +1 -1
- model-00002-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +1 -1
- runs/Jun21_02-39-47_n136-129-074/events.out.tfevents.1718908857.n136-129-074.3997378.0 +2 -2
- train_results.json +5 -5
- trainer_state.json +305 -230
- training_args.bin +1 -1
all_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples":
|
6 |
-
"train_samples_per_second": 8.
|
7 |
-
"train_steps_per_second": 0.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"train_loss": 0.6121151776493344,
|
4 |
+
"train_runtime": 6995.9878,
|
5 |
+
"train_samples": 61134,
|
6 |
+
"train_samples_per_second": 8.738,
|
7 |
+
"train_steps_per_second": 0.034
|
8 |
}
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4943162336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4cfba4b2bccd53078b6689bd3a353c0c6b8f8f63f7dd05990329ce2a4cd3dc2
|
3 |
size 4943162336
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999819336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6caee221211932c0d895c01ccf79dca18c9e4d8f596c8c6e5c470beb9f1daa58
|
3 |
size 4999819336
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4540516344
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d173bc5999339b83eaf9e08e87fad34084acdec72038e115d114762c5c0c29b
|
3 |
size 4540516344
|
runs/Jun21_02-39-47_n136-129-074/events.out.tfevents.1718908857.n136-129-074.3997378.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4de53dba9eadb4306d14a928a1deee7e5bc77f0981b2d86995978293214f379f
|
3 |
+
size 21607
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples":
|
6 |
-
"train_samples_per_second": 8.
|
7 |
-
"train_steps_per_second": 0.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"train_loss": 0.6121151776493344,
|
4 |
+
"train_runtime": 6995.9878,
|
5 |
+
"train_samples": 61134,
|
6 |
+
"train_samples_per_second": 8.738,
|
7 |
+
"train_steps_per_second": 0.034
|
8 |
}
|
trainer_state.json
CHANGED
@@ -3,20 +3,20 @@
|
|
3 |
"best_model_checkpoint": null,
|
4 |
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
-
"epoch": 0.
|
13 |
-
"grad_norm":
|
14 |
-
"learning_rate": 2.
|
15 |
-
"logits/chosen": -0.
|
16 |
-
"logits/rejected": 0.
|
17 |
-
"logps/chosen": -
|
18 |
-
"logps/rejected": -
|
19 |
-
"loss": 0.
|
20 |
"rewards/accuracies": 0.0,
|
21 |
"rewards/chosen": 0.0,
|
22 |
"rewards/margins": 0.0,
|
@@ -24,287 +24,362 @@
|
|
24 |
"step": 1
|
25 |
},
|
26 |
{
|
27 |
-
"epoch": 0.
|
28 |
-
"grad_norm":
|
29 |
-
"learning_rate": 2.
|
30 |
-
"logits/chosen": -0.
|
31 |
-
"logits/rejected": 0.
|
32 |
-
"logps/chosen": -
|
33 |
-
"logps/rejected": -
|
34 |
-
"loss": 0.
|
35 |
-
"rewards/accuracies": 0.
|
36 |
-
"rewards/chosen": -0.
|
37 |
-
"rewards/margins": 0.
|
38 |
-
"rewards/rejected": -0.
|
39 |
"step": 10
|
40 |
},
|
41 |
{
|
42 |
-
"epoch": 0.
|
43 |
-
"grad_norm":
|
44 |
-
"learning_rate": 4.
|
45 |
-
"logits/chosen": -0.
|
46 |
-
"logits/rejected": 0.
|
47 |
-
"logps/chosen": -
|
48 |
-
"logps/rejected": -
|
49 |
-
"loss": 0.
|
50 |
-
"rewards/accuracies": 0.
|
51 |
-
"rewards/chosen": -
|
52 |
-
"rewards/margins":
|
53 |
-
"rewards/rejected": -
|
54 |
"step": 20
|
55 |
},
|
56 |
{
|
57 |
-
"epoch": 0.
|
58 |
-
"grad_norm":
|
59 |
-
"learning_rate": 4.
|
60 |
-
"logits/chosen": -0.
|
61 |
-
"logits/rejected": 0.
|
62 |
-
"logps/chosen": -
|
63 |
-
"logps/rejected": -
|
64 |
-
"loss": 0.
|
65 |
"rewards/accuracies": 0.8062499761581421,
|
66 |
-
"rewards/chosen": -
|
67 |
-
"rewards/margins": 4.
|
68 |
-
"rewards/rejected": -
|
69 |
"step": 30
|
70 |
},
|
71 |
{
|
72 |
-
"epoch": 0.
|
73 |
-
"grad_norm":
|
74 |
-
"learning_rate": 4.
|
75 |
-
"logits/chosen": -0.
|
76 |
-
"logits/rejected": 0.
|
77 |
-
"logps/chosen": -
|
78 |
-
"logps/rejected": -
|
79 |
-
"loss": 0.
|
80 |
-
"rewards/accuracies": 0.
|
81 |
-
"rewards/chosen": -1.
|
82 |
-
"rewards/margins":
|
83 |
-
"rewards/rejected": -
|
84 |
"step": 40
|
85 |
},
|
86 |
{
|
87 |
-
"epoch": 0.
|
88 |
-
"grad_norm":
|
89 |
-
"learning_rate": 4.
|
90 |
-
"logits/chosen": -0.
|
91 |
-
"logits/rejected": 0.
|
92 |
-
"logps/chosen": -
|
93 |
-
"logps/rejected": -
|
94 |
-
"loss": 0.
|
95 |
-
"rewards/accuracies": 0.
|
96 |
-
"rewards/chosen": -0.
|
97 |
-
"rewards/margins":
|
98 |
-
"rewards/rejected": -
|
99 |
"step": 50
|
100 |
},
|
101 |
{
|
102 |
-
"epoch": 0.
|
103 |
-
"grad_norm":
|
104 |
-
"learning_rate": 4.
|
105 |
-
"logits/chosen": -0.
|
106 |
-
"logits/rejected": 0.
|
107 |
-
"logps/chosen": -
|
108 |
-
"logps/rejected": -
|
109 |
-
"loss": 0.
|
110 |
-
"rewards/accuracies": 0.
|
111 |
-
"rewards/chosen": -
|
112 |
-
"rewards/margins":
|
113 |
-
"rewards/rejected": -
|
114 |
"step": 60
|
115 |
},
|
116 |
{
|
117 |
-
"epoch": 0.
|
118 |
-
"grad_norm":
|
119 |
-
"learning_rate":
|
120 |
-
"logits/chosen": -0.
|
121 |
-
"logits/rejected": 0.
|
122 |
-
"logps/chosen": -
|
123 |
-
"logps/rejected": -
|
124 |
-
"loss": 0.
|
125 |
-
"rewards/accuracies": 0.
|
126 |
-
"rewards/chosen": -
|
127 |
-
"rewards/margins":
|
128 |
-
"rewards/rejected": -
|
129 |
"step": 70
|
130 |
},
|
131 |
{
|
132 |
-
"epoch": 0.
|
133 |
-
"grad_norm":
|
134 |
-
"learning_rate":
|
135 |
-
"logits/chosen": -0.
|
136 |
-
"logits/rejected": 0.
|
137 |
-
"logps/chosen": -
|
138 |
-
"logps/rejected": -
|
139 |
-
"loss": 0.
|
140 |
-
"rewards/accuracies": 0.
|
141 |
-
"rewards/chosen": -
|
142 |
-
"rewards/margins":
|
143 |
-
"rewards/rejected": -5.
|
144 |
"step": 80
|
145 |
},
|
146 |
{
|
147 |
-
"epoch": 0.
|
148 |
-
"grad_norm":
|
149 |
-
"learning_rate": 3.
|
150 |
-
"logits/chosen": -0.
|
151 |
-
"logits/rejected": 0.
|
152 |
-
"logps/chosen": -
|
153 |
-
"logps/rejected": -
|
154 |
-
"loss": 0.
|
155 |
-
"rewards/accuracies": 0.
|
156 |
-
"rewards/chosen": -
|
157 |
-
"rewards/margins":
|
158 |
-
"rewards/rejected": -
|
159 |
"step": 90
|
160 |
},
|
161 |
{
|
162 |
-
"epoch": 0.
|
163 |
-
"grad_norm":
|
164 |
-
"learning_rate":
|
165 |
-
"logits/chosen": -0.
|
166 |
-
"logits/rejected": 0.
|
167 |
-
"logps/chosen": -
|
168 |
-
"logps/rejected": -
|
169 |
-
"loss": 0.
|
170 |
-
"rewards/accuracies": 0.
|
171 |
-
"rewards/chosen": -1.
|
172 |
-
"rewards/margins":
|
173 |
-
"rewards/rejected": -
|
174 |
"step": 100
|
175 |
},
|
176 |
{
|
177 |
-
"epoch": 0.
|
178 |
-
"grad_norm":
|
179 |
-
"learning_rate":
|
180 |
-
"logits/chosen": 0.
|
181 |
-
"logits/rejected": 0.
|
182 |
-
"logps/chosen": -
|
183 |
-
"logps/rejected": -
|
184 |
-
"loss": 0.
|
185 |
-
"rewards/accuracies": 0.
|
186 |
-
"rewards/chosen": -
|
187 |
-
"rewards/margins":
|
188 |
-
"rewards/rejected": -6.
|
189 |
"step": 110
|
190 |
},
|
191 |
{
|
192 |
-
"epoch": 0.
|
193 |
-
"grad_norm":
|
194 |
-
"learning_rate":
|
195 |
-
"logits/chosen": -0.
|
196 |
-
"logits/rejected": 0.
|
197 |
-
"logps/chosen": -
|
198 |
-
"logps/rejected": -
|
199 |
-
"loss": 0.
|
200 |
-
"rewards/accuracies": 0.
|
201 |
-
"rewards/chosen": -
|
202 |
-
"rewards/margins":
|
203 |
-
"rewards/rejected": -
|
204 |
"step": 120
|
205 |
},
|
206 |
{
|
207 |
-
"epoch": 0.
|
208 |
-
"grad_norm":
|
209 |
-
"learning_rate":
|
210 |
-
"logits/chosen": -0.
|
211 |
-
"logits/rejected": 0.
|
212 |
-
"logps/chosen": -
|
213 |
-
"logps/rejected": -
|
214 |
-
"loss": 0.
|
215 |
-
"rewards/accuracies": 0.
|
216 |
-
"rewards/chosen": -1.
|
217 |
-
"rewards/margins":
|
218 |
-
"rewards/rejected": -5.
|
219 |
"step": 130
|
220 |
},
|
221 |
{
|
222 |
-
"epoch": 0.
|
223 |
-
"grad_norm":
|
224 |
-
"learning_rate":
|
225 |
-
"logits/chosen": -0.
|
226 |
-
"logits/rejected": 0.
|
227 |
-
"logps/chosen": -
|
228 |
-
"logps/rejected": -
|
229 |
-
"loss": 0.
|
230 |
-
"rewards/accuracies": 0.
|
231 |
-
"rewards/chosen": -
|
232 |
-
"rewards/margins":
|
233 |
-
"rewards/rejected": -6.
|
234 |
"step": 140
|
235 |
},
|
236 |
{
|
237 |
-
"epoch": 0.
|
238 |
-
"grad_norm":
|
239 |
-
"learning_rate":
|
240 |
-
"logits/chosen": -0.
|
241 |
-
"logits/rejected": 0.
|
242 |
-
"logps/chosen": -
|
243 |
-
"logps/rejected": -
|
244 |
-
"loss": 0.
|
245 |
-
"rewards/accuracies": 0.
|
246 |
-
"rewards/chosen": -
|
247 |
-
"rewards/margins":
|
248 |
-
"rewards/rejected": -
|
249 |
"step": 150
|
250 |
},
|
251 |
{
|
252 |
-
"epoch": 0.
|
253 |
-
"grad_norm":
|
254 |
-
"learning_rate":
|
255 |
-
"logits/chosen": -0.
|
256 |
-
"logits/rejected": 0.
|
257 |
-
"logps/chosen": -
|
258 |
-
"logps/rejected": -
|
259 |
-
"loss": 0.
|
260 |
-
"rewards/accuracies": 0.
|
261 |
-
"rewards/chosen": -
|
262 |
-
"rewards/margins":
|
263 |
-
"rewards/rejected": -
|
264 |
"step": 160
|
265 |
},
|
266 |
{
|
267 |
-
"epoch": 0.
|
268 |
-
"grad_norm":
|
269 |
-
"learning_rate": 1.
|
270 |
-
"logits/chosen": -0.
|
271 |
-
"logits/rejected": 0.
|
272 |
-
"logps/chosen": -
|
273 |
-
"logps/rejected": -
|
274 |
-
"loss": 0.
|
275 |
-
"rewards/accuracies": 0.
|
276 |
-
"rewards/chosen": -
|
277 |
-
"rewards/margins":
|
278 |
-
"rewards/rejected": -5.
|
279 |
"step": 170
|
280 |
},
|
281 |
{
|
282 |
-
"epoch": 0.
|
283 |
-
"grad_norm":
|
284 |
-
"learning_rate":
|
285 |
-
"logits/chosen": -0.
|
286 |
-
"logits/rejected": 0.
|
287 |
-
"logps/chosen": -
|
288 |
-
"logps/rejected": -
|
289 |
-
"loss": 0.
|
290 |
-
"rewards/accuracies": 0.
|
291 |
-
"rewards/chosen": -
|
292 |
-
"rewards/margins":
|
293 |
-
"rewards/rejected": -
|
294 |
"step": 180
|
295 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
{
|
297 |
"epoch": 1.0,
|
298 |
-
"step":
|
299 |
"total_flos": 0.0,
|
300 |
-
"train_loss": 0.
|
301 |
-
"train_runtime":
|
302 |
-
"train_samples_per_second": 8.
|
303 |
-
"train_steps_per_second": 0.
|
304 |
}
|
305 |
],
|
306 |
"logging_steps": 10,
|
307 |
-
"max_steps":
|
308 |
"num_input_tokens_seen": 0,
|
309 |
"num_train_epochs": 1,
|
310 |
"save_steps": 100,
|
|
|
3 |
"best_model_checkpoint": null,
|
4 |
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 239,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
+
"epoch": 0.0,
|
13 |
+
"grad_norm": 1228.6970114702256,
|
14 |
+
"learning_rate": 2.083333333333333e-08,
|
15 |
+
"logits/chosen": -0.5983926653862,
|
16 |
+
"logits/rejected": -0.3142164349555969,
|
17 |
+
"logps/chosen": -366.9183349609375,
|
18 |
+
"logps/rejected": -437.1544189453125,
|
19 |
+
"loss": 0.6931,
|
20 |
"rewards/accuracies": 0.0,
|
21 |
"rewards/chosen": 0.0,
|
22 |
"rewards/margins": 0.0,
|
|
|
24 |
"step": 1
|
25 |
},
|
26 |
{
|
27 |
+
"epoch": 0.04,
|
28 |
+
"grad_norm": 550.4800163195696,
|
29 |
+
"learning_rate": 2.0833333333333333e-07,
|
30 |
+
"logits/chosen": -0.3194640576839447,
|
31 |
+
"logits/rejected": 0.544389009475708,
|
32 |
+
"logps/chosen": -343.9505920410156,
|
33 |
+
"logps/rejected": -398.5333557128906,
|
34 |
+
"loss": 0.6237,
|
35 |
+
"rewards/accuracies": 0.5520833134651184,
|
36 |
+
"rewards/chosen": -0.3209317922592163,
|
37 |
+
"rewards/margins": 0.3546760380268097,
|
38 |
+
"rewards/rejected": -0.6756078600883484,
|
39 |
"step": 10
|
40 |
},
|
41 |
{
|
42 |
+
"epoch": 0.08,
|
43 |
+
"grad_norm": 703.9089332225451,
|
44 |
+
"learning_rate": 4.1666666666666667e-07,
|
45 |
+
"logits/chosen": -0.16047191619873047,
|
46 |
+
"logits/rejected": 0.8960522413253784,
|
47 |
+
"logps/chosen": -363.82220458984375,
|
48 |
+
"logps/rejected": -391.59588623046875,
|
49 |
+
"loss": 0.7373,
|
50 |
+
"rewards/accuracies": 0.784375011920929,
|
51 |
+
"rewards/chosen": -4.046866416931152,
|
52 |
+
"rewards/margins": 3.9459426403045654,
|
53 |
+
"rewards/rejected": -7.992809295654297,
|
54 |
"step": 20
|
55 |
},
|
56 |
{
|
57 |
+
"epoch": 0.13,
|
58 |
+
"grad_norm": 552.799882218425,
|
59 |
+
"learning_rate": 4.990398100856366e-07,
|
60 |
+
"logits/chosen": -0.1726907640695572,
|
61 |
+
"logits/rejected": 0.5421887636184692,
|
62 |
+
"logps/chosen": -354.1240234375,
|
63 |
+
"logps/rejected": -437.666259765625,
|
64 |
+
"loss": 0.7316,
|
65 |
"rewards/accuracies": 0.8062499761581421,
|
66 |
+
"rewards/chosen": -2.8292102813720703,
|
67 |
+
"rewards/margins": 4.830966949462891,
|
68 |
+
"rewards/rejected": -7.660176753997803,
|
69 |
"step": 30
|
70 |
},
|
71 |
{
|
72 |
+
"epoch": 0.17,
|
73 |
+
"grad_norm": 533.313986686025,
|
74 |
+
"learning_rate": 4.931986719649298e-07,
|
75 |
+
"logits/chosen": -0.4836675524711609,
|
76 |
+
"logits/rejected": 0.30907225608825684,
|
77 |
+
"logps/chosen": -369.8108215332031,
|
78 |
+
"logps/rejected": -389.89508056640625,
|
79 |
+
"loss": 0.6541,
|
80 |
+
"rewards/accuracies": 0.7749999761581421,
|
81 |
+
"rewards/chosen": -1.1337063312530518,
|
82 |
+
"rewards/margins": 3.0231781005859375,
|
83 |
+
"rewards/rejected": -4.15688419342041,
|
84 |
"step": 40
|
85 |
},
|
86 |
{
|
87 |
+
"epoch": 0.21,
|
88 |
+
"grad_norm": 556.3373380132485,
|
89 |
+
"learning_rate": 4.821741763807186e-07,
|
90 |
+
"logits/chosen": -0.36255133152008057,
|
91 |
+
"logits/rejected": 0.5106185674667358,
|
92 |
+
"logps/chosen": -335.77191162109375,
|
93 |
+
"logps/rejected": -392.73406982421875,
|
94 |
+
"loss": 0.593,
|
95 |
+
"rewards/accuracies": 0.8218749761581421,
|
96 |
+
"rewards/chosen": -0.6605129837989807,
|
97 |
+
"rewards/margins": 3.997671604156494,
|
98 |
+
"rewards/rejected": -4.658184051513672,
|
99 |
"step": 50
|
100 |
},
|
101 |
{
|
102 |
+
"epoch": 0.25,
|
103 |
+
"grad_norm": 587.3266847652458,
|
104 |
+
"learning_rate": 4.662012913161997e-07,
|
105 |
+
"logits/chosen": -0.48477110266685486,
|
106 |
+
"logits/rejected": 0.4101050794124603,
|
107 |
+
"logps/chosen": -372.4800109863281,
|
108 |
+
"logps/rejected": -392.51702880859375,
|
109 |
+
"loss": 0.665,
|
110 |
+
"rewards/accuracies": 0.7593749761581421,
|
111 |
+
"rewards/chosen": -1.5650008916854858,
|
112 |
+
"rewards/margins": 3.357562303543091,
|
113 |
+
"rewards/rejected": -4.922562599182129,
|
114 |
"step": 60
|
115 |
},
|
116 |
{
|
117 |
+
"epoch": 0.29,
|
118 |
+
"grad_norm": 466.5579056975509,
|
119 |
+
"learning_rate": 4.456204510851956e-07,
|
120 |
+
"logits/chosen": -0.5015081167221069,
|
121 |
+
"logits/rejected": 0.16009679436683655,
|
122 |
+
"logps/chosen": -373.043212890625,
|
123 |
+
"logps/rejected": -423.36041259765625,
|
124 |
+
"loss": 0.5988,
|
125 |
+
"rewards/accuracies": 0.8125,
|
126 |
+
"rewards/chosen": -2.6556434631347656,
|
127 |
+
"rewards/margins": 3.99314546585083,
|
128 |
+
"rewards/rejected": -6.648789405822754,
|
129 |
"step": 70
|
130 |
},
|
131 |
{
|
132 |
+
"epoch": 0.33,
|
133 |
+
"grad_norm": 569.6628049875762,
|
134 |
+
"learning_rate": 4.2087030056579986e-07,
|
135 |
+
"logits/chosen": -0.6794461607933044,
|
136 |
+
"logits/rejected": 0.288917601108551,
|
137 |
+
"logps/chosen": -352.2886657714844,
|
138 |
+
"logps/rejected": -395.7070617675781,
|
139 |
+
"loss": 0.6693,
|
140 |
+
"rewards/accuracies": 0.7437499761581421,
|
141 |
+
"rewards/chosen": -2.3514175415039062,
|
142 |
+
"rewards/margins": 3.268317699432373,
|
143 |
+
"rewards/rejected": -5.619735240936279,
|
144 |
"step": 80
|
145 |
},
|
146 |
{
|
147 |
+
"epoch": 0.38,
|
148 |
+
"grad_norm": 514.8591353074631,
|
149 |
+
"learning_rate": 3.9247834624635404e-07,
|
150 |
+
"logits/chosen": -0.302684485912323,
|
151 |
+
"logits/rejected": 0.4978007674217224,
|
152 |
+
"logps/chosen": -331.9069519042969,
|
153 |
+
"logps/rejected": -362.5081787109375,
|
154 |
+
"loss": 0.6574,
|
155 |
+
"rewards/accuracies": 0.7749999761581421,
|
156 |
+
"rewards/chosen": -2.7024354934692383,
|
157 |
+
"rewards/margins": 3.1714892387390137,
|
158 |
+
"rewards/rejected": -5.873924732208252,
|
159 |
"step": 90
|
160 |
},
|
161 |
{
|
162 |
+
"epoch": 0.42,
|
163 |
+
"grad_norm": 552.3368165213882,
|
164 |
+
"learning_rate": 3.610497133404795e-07,
|
165 |
+
"logits/chosen": -0.3550896942615509,
|
166 |
+
"logits/rejected": 0.3170868754386902,
|
167 |
+
"logps/chosen": -339.351318359375,
|
168 |
+
"logps/rejected": -385.2740173339844,
|
169 |
+
"loss": 0.6296,
|
170 |
+
"rewards/accuracies": 0.765625,
|
171 |
+
"rewards/chosen": -1.6958658695220947,
|
172 |
+
"rewards/margins": 3.125446319580078,
|
173 |
+
"rewards/rejected": -4.821312427520752,
|
174 |
"step": 100
|
175 |
},
|
176 |
{
|
177 |
+
"epoch": 0.46,
|
178 |
+
"grad_norm": 538.8081508757055,
|
179 |
+
"learning_rate": 3.272542485937368e-07,
|
180 |
+
"logits/chosen": -0.3151847720146179,
|
181 |
+
"logits/rejected": 0.5818986296653748,
|
182 |
+
"logps/chosen": -359.85504150390625,
|
183 |
+
"logps/rejected": -380.2372741699219,
|
184 |
+
"loss": 0.6323,
|
185 |
+
"rewards/accuracies": 0.800000011920929,
|
186 |
+
"rewards/chosen": -2.4269652366638184,
|
187 |
+
"rewards/margins": 3.6542327404022217,
|
188 |
+
"rewards/rejected": -6.081197738647461,
|
189 |
"step": 110
|
190 |
},
|
191 |
{
|
192 |
+
"epoch": 0.5,
|
193 |
+
"grad_norm": 480.1882844894413,
|
194 |
+
"learning_rate": 2.9181224366319943e-07,
|
195 |
+
"logits/chosen": -0.3534928262233734,
|
196 |
+
"logits/rejected": 0.37930893898010254,
|
197 |
+
"logps/chosen": -339.82684326171875,
|
198 |
+
"logps/rejected": -385.6717834472656,
|
199 |
+
"loss": 0.6157,
|
200 |
+
"rewards/accuracies": 0.793749988079071,
|
201 |
+
"rewards/chosen": -3.0394375324249268,
|
202 |
+
"rewards/margins": 3.763584852218628,
|
203 |
+
"rewards/rejected": -6.803022861480713,
|
204 |
"step": 120
|
205 |
},
|
206 |
{
|
207 |
+
"epoch": 0.54,
|
208 |
+
"grad_norm": 440.0988758060072,
|
209 |
+
"learning_rate": 2.55479083351317e-07,
|
210 |
+
"logits/chosen": -0.4240172505378723,
|
211 |
+
"logits/rejected": 0.5124756097793579,
|
212 |
+
"logps/chosen": -373.0182189941406,
|
213 |
+
"logps/rejected": -388.32049560546875,
|
214 |
+
"loss": 0.6406,
|
215 |
+
"rewards/accuracies": 0.7875000238418579,
|
216 |
+
"rewards/chosen": -1.8788728713989258,
|
217 |
+
"rewards/margins": 3.37638521194458,
|
218 |
+
"rewards/rejected": -5.255258560180664,
|
219 |
"step": 130
|
220 |
},
|
221 |
{
|
222 |
+
"epoch": 0.59,
|
223 |
+
"grad_norm": 567.7119079444957,
|
224 |
+
"learning_rate": 2.19029145890313e-07,
|
225 |
+
"logits/chosen": -0.40219712257385254,
|
226 |
+
"logits/rejected": 0.43135371804237366,
|
227 |
+
"logps/chosen": -348.668701171875,
|
228 |
+
"logps/rejected": -383.73431396484375,
|
229 |
+
"loss": 0.6295,
|
230 |
+
"rewards/accuracies": 0.784375011920929,
|
231 |
+
"rewards/chosen": -2.540496826171875,
|
232 |
+
"rewards/margins": 3.5945935249328613,
|
233 |
+
"rewards/rejected": -6.135090351104736,
|
234 |
"step": 140
|
235 |
},
|
236 |
{
|
237 |
+
"epoch": 0.63,
|
238 |
+
"grad_norm": 580.9997311687505,
|
239 |
+
"learning_rate": 1.8323929841460178e-07,
|
240 |
+
"logits/chosen": -0.45832833647727966,
|
241 |
+
"logits/rejected": 0.4165743887424469,
|
242 |
+
"logps/chosen": -356.80035400390625,
|
243 |
+
"logps/rejected": -386.0637512207031,
|
244 |
+
"loss": 0.6079,
|
245 |
+
"rewards/accuracies": 0.765625,
|
246 |
+
"rewards/chosen": -2.680931568145752,
|
247 |
+
"rewards/margins": 3.39042592048645,
|
248 |
+
"rewards/rejected": -6.071357250213623,
|
249 |
"step": 150
|
250 |
},
|
251 |
{
|
252 |
+
"epoch": 0.67,
|
253 |
+
"grad_norm": 571.50548834413,
|
254 |
+
"learning_rate": 1.488723393865766e-07,
|
255 |
+
"logits/chosen": -0.2687947750091553,
|
256 |
+
"logits/rejected": 0.5257094502449036,
|
257 |
+
"logps/chosen": -372.47467041015625,
|
258 |
+
"logps/rejected": -392.5708923339844,
|
259 |
+
"loss": 0.5822,
|
260 |
+
"rewards/accuracies": 0.78125,
|
261 |
+
"rewards/chosen": -2.8401567935943604,
|
262 |
+
"rewards/margins": 3.3291401863098145,
|
263 |
+
"rewards/rejected": -6.169297218322754,
|
264 |
"step": 160
|
265 |
},
|
266 |
{
|
267 |
+
"epoch": 0.71,
|
268 |
+
"grad_norm": 635.1524720165668,
|
269 |
+
"learning_rate": 1.1666074087171627e-07,
|
270 |
+
"logits/chosen": -0.4828396439552307,
|
271 |
+
"logits/rejected": 0.25855886936187744,
|
272 |
+
"logps/chosen": -358.3913879394531,
|
273 |
+
"logps/rejected": -405.04833984375,
|
274 |
+
"loss": 0.5831,
|
275 |
+
"rewards/accuracies": 0.765625,
|
276 |
+
"rewards/chosen": -2.5792622566223145,
|
277 |
+
"rewards/margins": 3.4110665321350098,
|
278 |
+
"rewards/rejected": -5.990328788757324,
|
279 |
"step": 170
|
280 |
},
|
281 |
{
|
282 |
+
"epoch": 0.75,
|
283 |
+
"grad_norm": 475.58804797511704,
|
284 |
+
"learning_rate": 8.729103716819111e-08,
|
285 |
+
"logits/chosen": -0.4832540452480316,
|
286 |
+
"logits/rejected": 0.45813870429992676,
|
287 |
+
"logps/chosen": -360.1710510253906,
|
288 |
+
"logps/rejected": -385.29180908203125,
|
289 |
+
"loss": 0.5327,
|
290 |
+
"rewards/accuracies": 0.796875,
|
291 |
+
"rewards/chosen": -3.303698778152466,
|
292 |
+
"rewards/margins": 3.9217076301574707,
|
293 |
+
"rewards/rejected": -7.225406646728516,
|
294 |
"step": 180
|
295 |
},
|
296 |
+
{
|
297 |
+
"epoch": 0.79,
|
298 |
+
"grad_norm": 505.1938261302253,
|
299 |
+
"learning_rate": 6.138919252022435e-08,
|
300 |
+
"logits/chosen": -0.2969876229763031,
|
301 |
+
"logits/rejected": 0.3868524134159088,
|
302 |
+
"logps/chosen": -341.55084228515625,
|
303 |
+
"logps/rejected": -388.48858642578125,
|
304 |
+
"loss": 0.6088,
|
305 |
+
"rewards/accuracies": 0.815625011920929,
|
306 |
+
"rewards/chosen": -3.5507800579071045,
|
307 |
+
"rewards/margins": 3.956249713897705,
|
308 |
+
"rewards/rejected": -7.507031440734863,
|
309 |
+
"step": 190
|
310 |
+
},
|
311 |
+
{
|
312 |
+
"epoch": 0.84,
|
313 |
+
"grad_norm": 538.707594486278,
|
314 |
+
"learning_rate": 3.9507259776993954e-08,
|
315 |
+
"logits/chosen": -0.4874555170536041,
|
316 |
+
"logits/rejected": 0.24319347739219666,
|
317 |
+
"logps/chosen": -368.9916687011719,
|
318 |
+
"logps/rejected": -413.5546875,
|
319 |
+
"loss": 0.5485,
|
320 |
+
"rewards/accuracies": 0.78125,
|
321 |
+
"rewards/chosen": -2.9248712062835693,
|
322 |
+
"rewards/margins": 3.447073459625244,
|
323 |
+
"rewards/rejected": -6.371943950653076,
|
324 |
+
"step": 200
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.88,
|
328 |
+
"grad_norm": 653.4659715305337,
|
329 |
+
"learning_rate": 2.2111614344599684e-08,
|
330 |
+
"logits/chosen": -0.5115900635719299,
|
331 |
+
"logits/rejected": 0.3286168575286865,
|
332 |
+
"logps/chosen": -377.30523681640625,
|
333 |
+
"logps/rejected": -402.25811767578125,
|
334 |
+
"loss": 0.5517,
|
335 |
+
"rewards/accuracies": 0.793749988079071,
|
336 |
+
"rewards/chosen": -2.5977110862731934,
|
337 |
+
"rewards/margins": 3.4149317741394043,
|
338 |
+
"rewards/rejected": -6.012642860412598,
|
339 |
+
"step": 210
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"epoch": 0.92,
|
343 |
+
"grad_norm": 509.6499303160426,
|
344 |
+
"learning_rate": 9.57301420397924e-09,
|
345 |
+
"logits/chosen": -0.5053508877754211,
|
346 |
+
"logits/rejected": 0.2792305648326874,
|
347 |
+
"logps/chosen": -361.4806213378906,
|
348 |
+
"logps/rejected": -397.5937805175781,
|
349 |
+
"loss": 0.5519,
|
350 |
+
"rewards/accuracies": 0.8031250238418579,
|
351 |
+
"rewards/chosen": -2.464998960494995,
|
352 |
+
"rewards/margins": 3.379429578781128,
|
353 |
+
"rewards/rejected": -5.844428062438965,
|
354 |
+
"step": 220
|
355 |
+
},
|
356 |
+
{
|
357 |
+
"epoch": 0.96,
|
358 |
+
"grad_norm": 515.045513417983,
|
359 |
+
"learning_rate": 2.158697848236607e-09,
|
360 |
+
"logits/chosen": -0.4747482240200043,
|
361 |
+
"logits/rejected": 0.19420669972896576,
|
362 |
+
"logps/chosen": -361.44647216796875,
|
363 |
+
"logps/rejected": -409.26702880859375,
|
364 |
+
"loss": 0.5529,
|
365 |
+
"rewards/accuracies": 0.746874988079071,
|
366 |
+
"rewards/chosen": -2.540475368499756,
|
367 |
+
"rewards/margins": 3.0807271003723145,
|
368 |
+
"rewards/rejected": -5.62120246887207,
|
369 |
+
"step": 230
|
370 |
+
},
|
371 |
{
|
372 |
"epoch": 1.0,
|
373 |
+
"step": 239,
|
374 |
"total_flos": 0.0,
|
375 |
+
"train_loss": 0.6121151776493344,
|
376 |
+
"train_runtime": 6995.9878,
|
377 |
+
"train_samples_per_second": 8.738,
|
378 |
+
"train_steps_per_second": 0.034
|
379 |
}
|
380 |
],
|
381 |
"logging_steps": 10,
|
382 |
+
"max_steps": 239,
|
383 |
"num_input_tokens_seen": 0,
|
384 |
"num_train_epochs": 1,
|
385 |
"save_steps": 100,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6328
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fcf5c85f80c92a9eacc9ea3ac3046fce1e674dd5e2c6a60e50c6fe6b0e4a3a2
|
3 |
size 6328
|