Model save
Browse files- all_results.json +4 -4
- model-00001-of-00003.safetensors +1 -1
- model-00002-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +1 -1
- runs/Jun18_02-36-18_n136-100-194/events.out.tfevents.1718649412.n136-100-194.2675446.0 +2 -2
- train_results.json +4 -4
- trainer_state.json +218 -218
- training_args.bin +2 -2
all_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples":
|
6 |
-
"train_samples_per_second": 8.
|
7 |
"train_steps_per_second": 0.034
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"train_loss": 0.5129612217778745,
|
4 |
+
"train_runtime": 5390.6556,
|
5 |
+
"train_samples": 47205,
|
6 |
+
"train_samples_per_second": 8.757,
|
7 |
"train_steps_per_second": 0.034
|
8 |
}
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4943162336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cf429f2365d057d537eb6eb684ec0e28181ee38fa55cd21735e7e6f691bdff9
|
3 |
size 4943162336
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999819336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9331d412f5bc9c44c9792f8a2afaa5a28095041cc6f7c254ae83a5ed634d33d8
|
3 |
size 4999819336
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4540516344
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43b920dd703fb81aa8790bea0c995d0448371190e6f8d23f5e71d685ce1bda1e
|
3 |
size 4540516344
|
runs/Jun18_02-36-18_n136-100-194/events.out.tfevents.1718649412.n136-100-194.2675446.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72febef4cdd2fc0e0c7f5f1f5d588ce85dd8f4850a1a3eb70382c76988f19607
|
3 |
+
size 18157
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples":
|
6 |
-
"train_samples_per_second": 8.
|
7 |
"train_steps_per_second": 0.034
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"train_loss": 0.5129612217778745,
|
4 |
+
"train_runtime": 5390.6556,
|
5 |
+
"train_samples": 47205,
|
6 |
+
"train_samples_per_second": 8.757,
|
7 |
"train_steps_per_second": 0.034
|
8 |
}
|
trainer_state.json
CHANGED
@@ -1,22 +1,22 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.01,
|
13 |
-
"grad_norm":
|
14 |
"learning_rate": 2.6315789473684206e-09,
|
15 |
-
"logits/chosen": -0.
|
16 |
-
"logits/rejected": -0.
|
17 |
-
"logps/chosen": -
|
18 |
-
"logps/rejected": -
|
19 |
-
"loss": 0.
|
20 |
"rewards/accuracies": 0.0,
|
21 |
"rewards/chosen": 0.0,
|
22 |
"rewards/margins": 0.0,
|
@@ -25,286 +25,286 @@
|
|
25 |
},
|
26 |
{
|
27 |
"epoch": 0.05,
|
28 |
-
"grad_norm":
|
29 |
"learning_rate": 2.6315789473684208e-08,
|
30 |
-
"logits/chosen": -0.
|
31 |
-
"logits/rejected": -0.
|
32 |
-
"logps/chosen": -
|
33 |
-
"logps/rejected": -
|
34 |
-
"loss": 0.
|
35 |
-
"rewards/accuracies": 0.
|
36 |
-
"rewards/chosen": 0.
|
37 |
-
"rewards/margins":
|
38 |
-
"rewards/rejected": 0.
|
39 |
"step": 10
|
40 |
},
|
41 |
{
|
42 |
"epoch": 0.11,
|
43 |
-
"grad_norm":
|
44 |
-
"learning_rate": 4.
|
45 |
-
"logits/chosen": -0.
|
46 |
-
"logits/rejected": -0.
|
47 |
-
"logps/chosen": -
|
48 |
-
"logps/rejected": -
|
49 |
-
"loss": 0.
|
50 |
-
"rewards/accuracies": 0.
|
51 |
-
"rewards/chosen":
|
52 |
-
"rewards/margins": 0.
|
53 |
-
"rewards/rejected":
|
54 |
"step": 20
|
55 |
},
|
56 |
{
|
57 |
"epoch": 0.16,
|
58 |
-
"grad_norm":
|
59 |
-
"learning_rate": 4.
|
60 |
-
"logits/chosen": -0.
|
61 |
-
"logits/rejected": -0.
|
62 |
-
"logps/chosen": -
|
63 |
-
"logps/rejected": -
|
64 |
-
"loss": 0.
|
65 |
-
"rewards/accuracies": 0.
|
66 |
-
"rewards/chosen":
|
67 |
-
"rewards/margins": 0.
|
68 |
-
"rewards/rejected": -0.
|
69 |
"step": 30
|
70 |
},
|
71 |
{
|
72 |
"epoch": 0.22,
|
73 |
-
"grad_norm":
|
74 |
-
"learning_rate": 4.
|
75 |
-
"logits/chosen": -0.
|
76 |
-
"logits/rejected": -0.
|
77 |
-
"logps/chosen": -
|
78 |
-
"logps/rejected": -
|
79 |
-
"loss": 0.
|
80 |
-
"rewards/accuracies": 0.
|
81 |
-
"rewards/chosen": 0.
|
82 |
-
"rewards/margins": 0.
|
83 |
-
"rewards/rejected": -0.
|
84 |
"step": 40
|
85 |
},
|
86 |
{
|
87 |
"epoch": 0.27,
|
88 |
-
"grad_norm":
|
89 |
-
"learning_rate": 4.
|
90 |
-
"logits/chosen": -0.
|
91 |
-
"logits/rejected": -0.
|
92 |
-
"logps/chosen": -
|
93 |
-
"logps/rejected": -
|
94 |
-
"loss": 0.
|
95 |
-
"rewards/accuracies": 0.
|
96 |
-
"rewards/chosen": 0.
|
97 |
-
"rewards/margins": 0.
|
98 |
-
"rewards/rejected": -0.
|
99 |
"step": 50
|
100 |
},
|
101 |
{
|
102 |
"epoch": 0.33,
|
103 |
-
"grad_norm":
|
104 |
-
"learning_rate": 4.
|
105 |
-
"logits/chosen": -0.
|
106 |
-
"logits/rejected": -0.
|
107 |
-
"logps/chosen": -
|
108 |
-
"logps/rejected": -
|
109 |
-
"loss": 0.
|
110 |
"rewards/accuracies": 0.7718750238418579,
|
111 |
-
"rewards/chosen": 0.
|
112 |
-
"rewards/margins": 0.
|
113 |
-
"rewards/rejected": -0.
|
114 |
"step": 60
|
115 |
},
|
116 |
{
|
117 |
"epoch": 0.38,
|
118 |
-
"grad_norm":
|
119 |
-
"learning_rate": 3.
|
120 |
-
"logits/chosen": -
|
121 |
-
"logits/rejected": -0.
|
122 |
-
"logps/chosen": -
|
123 |
-
"logps/rejected": -
|
124 |
-
"loss": 0.
|
125 |
-
"rewards/accuracies": 0.
|
126 |
-
"rewards/chosen": 0.
|
127 |
-
"rewards/margins": 0.
|
128 |
-
"rewards/rejected": 0.
|
129 |
"step": 70
|
130 |
},
|
131 |
{
|
132 |
-
"epoch": 0.
|
133 |
-
"grad_norm":
|
134 |
-
"learning_rate": 3.
|
135 |
-
"logits/chosen": -0.
|
136 |
-
"logits/rejected": -0.
|
137 |
-
"logps/chosen": -
|
138 |
-
"logps/rejected": -
|
139 |
-
"loss": 0.
|
140 |
-
"rewards/accuracies": 0.
|
141 |
-
"rewards/chosen": 0.
|
142 |
-
"rewards/margins": 0.
|
143 |
-
"rewards/rejected":
|
144 |
"step": 80
|
145 |
},
|
146 |
{
|
147 |
"epoch": 0.49,
|
148 |
-
"grad_norm":
|
149 |
-
"learning_rate": 3.
|
150 |
-
"logits/chosen": -
|
151 |
-
"logits/rejected": -0.
|
152 |
-
"logps/chosen": -
|
153 |
-
"logps/rejected": -
|
154 |
-
"loss": 0.
|
155 |
-
"rewards/accuracies": 0.
|
156 |
-
"rewards/chosen":
|
157 |
-
"rewards/margins":
|
158 |
-
"rewards/rejected":
|
159 |
"step": 90
|
160 |
},
|
161 |
{
|
162 |
-
"epoch": 0.
|
163 |
-
"grad_norm":
|
164 |
-
"learning_rate": 2.
|
165 |
-
"logits/chosen": -0.
|
166 |
-
"logits/rejected": -0.
|
167 |
-
"logps/chosen": -
|
168 |
-
"logps/rejected": -
|
169 |
-
"loss": 0.
|
170 |
-
"rewards/accuracies": 0.
|
171 |
-
"rewards/chosen":
|
172 |
-
"rewards/margins":
|
173 |
-
"rewards/rejected":
|
174 |
"step": 100
|
175 |
},
|
176 |
{
|
177 |
"epoch": 0.6,
|
178 |
-
"grad_norm":
|
179 |
-
"learning_rate": 2.
|
180 |
-
"logits/chosen": -0.
|
181 |
-
"logits/rejected": -0.
|
182 |
-
"logps/chosen": -
|
183 |
-
"logps/rejected": -
|
184 |
-
"loss": 0.
|
185 |
-
"rewards/accuracies": 0.
|
186 |
-
"rewards/chosen":
|
187 |
-
"rewards/margins": 1.
|
188 |
-
"rewards/rejected": -0.
|
189 |
"step": 110
|
190 |
},
|
191 |
{
|
192 |
-
"epoch": 0.
|
193 |
-
"grad_norm":
|
194 |
-
"learning_rate": 1.
|
195 |
-
"logits/chosen": -0.
|
196 |
-
"logits/rejected": -0.
|
197 |
-
"logps/chosen": -
|
198 |
-
"logps/rejected": -
|
199 |
-
"loss": 0.
|
200 |
-
"rewards/accuracies": 0.
|
201 |
-
"rewards/chosen": 1.
|
202 |
-
"rewards/margins": 1.
|
203 |
-
"rewards/rejected":
|
204 |
"step": 120
|
205 |
},
|
206 |
{
|
207 |
-
"epoch": 0.
|
208 |
-
"grad_norm":
|
209 |
-
"learning_rate": 1.
|
210 |
-
"logits/chosen": -0.
|
211 |
-
"logits/rejected": -0.
|
212 |
-
"logps/chosen": -
|
213 |
-
"logps/rejected": -
|
214 |
-
"loss": 0.
|
215 |
-
"rewards/accuracies": 0.
|
216 |
-
"rewards/chosen":
|
217 |
-
"rewards/margins":
|
218 |
-
"rewards/rejected":
|
219 |
"step": 130
|
220 |
},
|
221 |
{
|
222 |
-
"epoch": 0.
|
223 |
-
"grad_norm":
|
224 |
-
"learning_rate":
|
225 |
-
"logits/chosen": -
|
226 |
-
"logits/rejected": -0.
|
227 |
-
"logps/chosen": -
|
228 |
-
"logps/rejected": -
|
229 |
-
"loss": 0.
|
230 |
-
"rewards/accuracies": 0.
|
231 |
-
"rewards/chosen":
|
232 |
-
"rewards/margins": 1.
|
233 |
-
"rewards/rejected":
|
234 |
"step": 140
|
235 |
},
|
236 |
{
|
237 |
-
"epoch": 0.
|
238 |
-
"grad_norm":
|
239 |
-
"learning_rate":
|
240 |
-
"logits/chosen": -0.
|
241 |
-
"logits/rejected": -0.
|
242 |
-
"logps/chosen": -
|
243 |
-
"logps/rejected": -
|
244 |
-
"loss": 0.
|
245 |
-
"rewards/accuracies": 0.
|
246 |
-
"rewards/chosen":
|
247 |
-
"rewards/margins": 1.
|
248 |
-
"rewards/rejected": -0.
|
249 |
"step": 150
|
250 |
},
|
251 |
{
|
252 |
-
"epoch": 0.
|
253 |
-
"grad_norm":
|
254 |
-
"learning_rate": 2.
|
255 |
-
"logits/chosen": -0.
|
256 |
-
"logits/rejected": -0.
|
257 |
-
"logps/chosen": -
|
258 |
-
"logps/rejected": -
|
259 |
-
"loss": 0.
|
260 |
-
"rewards/accuracies": 0.
|
261 |
-
"rewards/chosen": 1.
|
262 |
-
"rewards/margins": 1.
|
263 |
-
"rewards/rejected": -0.
|
264 |
"step": 160
|
265 |
},
|
266 |
{
|
267 |
-
"epoch": 0.
|
268 |
-
"grad_norm":
|
269 |
-
"learning_rate":
|
270 |
-
"logits/chosen": -0.
|
271 |
-
"logits/rejected": -0.
|
272 |
-
"logps/chosen": -
|
273 |
-
"logps/rejected": -
|
274 |
-
"loss": 0.
|
275 |
-
"rewards/accuracies": 0.
|
276 |
-
"rewards/chosen": 1.
|
277 |
-
"rewards/margins": 1.
|
278 |
-
"rewards/rejected": -0.
|
279 |
"step": 170
|
280 |
},
|
281 |
{
|
282 |
-
"epoch": 0.
|
283 |
-
"grad_norm":
|
284 |
-
"learning_rate":
|
285 |
-
"logits/chosen": -0.
|
286 |
-
"logits/rejected": -0.
|
287 |
-
"logps/chosen": -
|
288 |
-
"logps/rejected": -
|
289 |
-
"loss": 0.
|
290 |
-
"rewards/accuracies": 0.
|
291 |
-
"rewards/chosen":
|
292 |
-
"rewards/margins": 1.
|
293 |
-
"rewards/rejected": -0.
|
294 |
"step": 180
|
295 |
},
|
296 |
{
|
297 |
"epoch": 1.0,
|
298 |
-
"step":
|
299 |
"total_flos": 0.0,
|
300 |
-
"train_loss": 0.
|
301 |
-
"train_runtime":
|
302 |
-
"train_samples_per_second": 8.
|
303 |
"train_steps_per_second": 0.034
|
304 |
}
|
305 |
],
|
306 |
"logging_steps": 10,
|
307 |
-
"max_steps":
|
308 |
"num_input_tokens_seen": 0,
|
309 |
"num_train_epochs": 1,
|
310 |
"save_steps": 100,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.997289972899729,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 184,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.01,
|
13 |
+
"grad_norm": 699.419624450993,
|
14 |
"learning_rate": 2.6315789473684206e-09,
|
15 |
+
"logits/chosen": -0.8214728236198425,
|
16 |
+
"logits/rejected": -0.3221539855003357,
|
17 |
+
"logps/chosen": -281.03485107421875,
|
18 |
+
"logps/rejected": -258.44012451171875,
|
19 |
+
"loss": 0.6984,
|
20 |
"rewards/accuracies": 0.0,
|
21 |
"rewards/chosen": 0.0,
|
22 |
"rewards/margins": 0.0,
|
|
|
25 |
},
|
26 |
{
|
27 |
"epoch": 0.05,
|
28 |
+
"grad_norm": 688.9664821722622,
|
29 |
"learning_rate": 2.6315789473684208e-08,
|
30 |
+
"logits/chosen": -0.8804507255554199,
|
31 |
+
"logits/rejected": -0.6175813674926758,
|
32 |
+
"logps/chosen": -333.9942932128906,
|
33 |
+
"logps/rejected": -301.1158142089844,
|
34 |
+
"loss": 0.7112,
|
35 |
+
"rewards/accuracies": 0.4583333432674408,
|
36 |
+
"rewards/chosen": 0.02290305122733116,
|
37 |
+
"rewards/margins": 0.000978956581093371,
|
38 |
+
"rewards/rejected": 0.0219240952283144,
|
39 |
"step": 10
|
40 |
},
|
41 |
{
|
42 |
"epoch": 0.11,
|
43 |
+
"grad_norm": 708.2483637617346,
|
44 |
+
"learning_rate": 4.9995468638088146e-08,
|
45 |
+
"logits/chosen": -0.8514911532402039,
|
46 |
+
"logits/rejected": -0.6387242078781128,
|
47 |
+
"logps/chosen": -341.757080078125,
|
48 |
+
"logps/rejected": -314.51751708984375,
|
49 |
+
"loss": 0.7023,
|
50 |
+
"rewards/accuracies": 0.4906249940395355,
|
51 |
+
"rewards/chosen": 0.009864812716841698,
|
52 |
+
"rewards/margins": 0.0014371216529980302,
|
53 |
+
"rewards/rejected": 0.008427691645920277,
|
54 |
"step": 20
|
55 |
},
|
56 |
{
|
57 |
"epoch": 0.16,
|
58 |
+
"grad_norm": 577.1248066355766,
|
59 |
+
"learning_rate": 4.945369001834514e-08,
|
60 |
+
"logits/chosen": -0.9569602012634277,
|
61 |
+
"logits/rejected": -0.7534269690513611,
|
62 |
+
"logps/chosen": -346.7052001953125,
|
63 |
+
"logps/rejected": -312.9559631347656,
|
64 |
+
"loss": 0.6348,
|
65 |
+
"rewards/accuracies": 0.596875011920929,
|
66 |
+
"rewards/chosen": 0.04675150662660599,
|
67 |
+
"rewards/margins": 0.11421873420476913,
|
68 |
+
"rewards/rejected": -0.06746722757816315,
|
69 |
"step": 30
|
70 |
},
|
71 |
{
|
72 |
"epoch": 0.22,
|
73 |
+
"grad_norm": 579.099443117778,
|
74 |
+
"learning_rate": 4.802809132787125e-08,
|
75 |
+
"logits/chosen": -0.9495086669921875,
|
76 |
+
"logits/rejected": -0.69568932056427,
|
77 |
+
"logps/chosen": -342.0091247558594,
|
78 |
+
"logps/rejected": -313.8953552246094,
|
79 |
+
"loss": 0.5992,
|
80 |
+
"rewards/accuracies": 0.715624988079071,
|
81 |
+
"rewards/chosen": 0.18988165259361267,
|
82 |
+
"rewards/margins": 0.39908966422080994,
|
83 |
+
"rewards/rejected": -0.20920801162719727,
|
84 |
"step": 40
|
85 |
},
|
86 |
{
|
87 |
"epoch": 0.27,
|
88 |
+
"grad_norm": 522.7924733724386,
|
89 |
+
"learning_rate": 4.577019741770136e-08,
|
90 |
+
"logits/chosen": -0.9217802882194519,
|
91 |
+
"logits/rejected": -0.5782603025436401,
|
92 |
+
"logps/chosen": -324.60467529296875,
|
93 |
+
"logps/rejected": -291.2610778808594,
|
94 |
+
"loss": 0.5445,
|
95 |
+
"rewards/accuracies": 0.734375,
|
96 |
+
"rewards/chosen": 0.451026976108551,
|
97 |
+
"rewards/margins": 0.6606618762016296,
|
98 |
+
"rewards/rejected": -0.209634929895401,
|
99 |
"step": 50
|
100 |
},
|
101 |
{
|
102 |
"epoch": 0.33,
|
103 |
+
"grad_norm": 441.70974291440746,
|
104 |
+
"learning_rate": 4.276161445663422e-08,
|
105 |
+
"logits/chosen": -0.9271470308303833,
|
106 |
+
"logits/rejected": -0.5116554498672485,
|
107 |
+
"logps/chosen": -351.0398864746094,
|
108 |
+
"logps/rejected": -313.02984619140625,
|
109 |
+
"loss": 0.5198,
|
110 |
"rewards/accuracies": 0.7718750238418579,
|
111 |
+
"rewards/chosen": 0.6475666761398315,
|
112 |
+
"rewards/margins": 0.7143425941467285,
|
113 |
+
"rewards/rejected": -0.06677587330341339,
|
114 |
"step": 60
|
115 |
},
|
116 |
{
|
117 |
"epoch": 0.38,
|
118 |
+
"grad_norm": 542.2213130459819,
|
119 |
+
"learning_rate": 3.9111080471669236e-08,
|
120 |
+
"logits/chosen": -0.8212447166442871,
|
121 |
+
"logits/rejected": -0.4982244372367859,
|
122 |
+
"logps/chosen": -323.27349853515625,
|
123 |
+
"logps/rejected": -298.29718017578125,
|
124 |
+
"loss": 0.4963,
|
125 |
+
"rewards/accuracies": 0.7562500238418579,
|
126 |
+
"rewards/chosen": 0.8154565095901489,
|
127 |
+
"rewards/margins": 0.7821646928787231,
|
128 |
+
"rewards/rejected": 0.033291809260845184,
|
129 |
"step": 70
|
130 |
},
|
131 |
{
|
132 |
+
"epoch": 0.43,
|
133 |
+
"grad_norm": 474.1471440060756,
|
134 |
+
"learning_rate": 3.495053527239656e-08,
|
135 |
+
"logits/chosen": -0.853967547416687,
|
136 |
+
"logits/rejected": -0.506403923034668,
|
137 |
+
"logps/chosen": -326.6103820800781,
|
138 |
+
"logps/rejected": -298.58380126953125,
|
139 |
+
"loss": 0.4837,
|
140 |
+
"rewards/accuracies": 0.75,
|
141 |
+
"rewards/chosen": 0.8894384503364563,
|
142 |
+
"rewards/margins": 0.8635719418525696,
|
143 |
+
"rewards/rejected": 0.025866415351629257,
|
144 |
"step": 80
|
145 |
},
|
146 |
{
|
147 |
"epoch": 0.49,
|
148 |
+
"grad_norm": 425.2259909957891,
|
149 |
+
"learning_rate": 3.0430351802512695e-08,
|
150 |
+
"logits/chosen": -0.9206312298774719,
|
151 |
+
"logits/rejected": -0.5899518728256226,
|
152 |
+
"logps/chosen": -332.1669921875,
|
153 |
+
"logps/rejected": -301.76629638671875,
|
154 |
+
"loss": 0.4805,
|
155 |
+
"rewards/accuracies": 0.768750011920929,
|
156 |
+
"rewards/chosen": 1.0642414093017578,
|
157 |
+
"rewards/margins": 0.9260485768318176,
|
158 |
+
"rewards/rejected": 0.13819284737110138,
|
159 |
"step": 90
|
160 |
},
|
161 |
{
|
162 |
+
"epoch": 0.54,
|
163 |
+
"grad_norm": 522.4112789139864,
|
164 |
+
"learning_rate": 2.5713901269842402e-08,
|
165 |
+
"logits/chosen": -0.9058634042739868,
|
166 |
+
"logits/rejected": -0.5568256974220276,
|
167 |
+
"logps/chosen": -339.79473876953125,
|
168 |
+
"logps/rejected": -303.04571533203125,
|
169 |
+
"loss": 0.4627,
|
170 |
+
"rewards/accuracies": 0.768750011920929,
|
171 |
+
"rewards/chosen": 1.063265085220337,
|
172 |
+
"rewards/margins": 1.0086718797683716,
|
173 |
+
"rewards/rejected": 0.05459331348538399,
|
174 |
"step": 100
|
175 |
},
|
176 |
{
|
177 |
"epoch": 0.6,
|
178 |
+
"grad_norm": 559.2014619266797,
|
179 |
+
"learning_rate": 2.09716484852284e-08,
|
180 |
+
"logits/chosen": -0.8277750015258789,
|
181 |
+
"logits/rejected": -0.4014664590358734,
|
182 |
+
"logps/chosen": -327.2757568359375,
|
183 |
+
"logps/rejected": -290.67767333984375,
|
184 |
+
"loss": 0.4654,
|
185 |
+
"rewards/accuracies": 0.784375011920929,
|
186 |
+
"rewards/chosen": 1.061645269393921,
|
187 |
+
"rewards/margins": 1.108440637588501,
|
188 |
+
"rewards/rejected": -0.04679515212774277,
|
189 |
"step": 110
|
190 |
},
|
191 |
{
|
192 |
+
"epoch": 0.65,
|
193 |
+
"grad_norm": 422.80636655795246,
|
194 |
+
"learning_rate": 1.637499082012574e-08,
|
195 |
+
"logits/chosen": -0.8766531944274902,
|
196 |
+
"logits/rejected": -0.6933692097663879,
|
197 |
+
"logps/chosen": -318.14697265625,
|
198 |
+
"logps/rejected": -298.24462890625,
|
199 |
+
"loss": 0.4535,
|
200 |
+
"rewards/accuracies": 0.7749999761581421,
|
201 |
+
"rewards/chosen": 1.206791639328003,
|
202 |
+
"rewards/margins": 1.0834404230117798,
|
203 |
+
"rewards/rejected": 0.12335111945867538,
|
204 |
"step": 120
|
205 |
},
|
206 |
{
|
207 |
+
"epoch": 0.7,
|
208 |
+
"grad_norm": 446.84426011615477,
|
209 |
+
"learning_rate": 1.2090063459025954e-08,
|
210 |
+
"logits/chosen": -0.9841381907463074,
|
211 |
+
"logits/rejected": -0.7362494468688965,
|
212 |
+
"logps/chosen": -345.0978088378906,
|
213 |
+
"logps/rejected": -313.47198486328125,
|
214 |
+
"loss": 0.4637,
|
215 |
+
"rewards/accuracies": 0.78125,
|
216 |
+
"rewards/chosen": 1.106555700302124,
|
217 |
+
"rewards/margins": 1.0541633367538452,
|
218 |
+
"rewards/rejected": 0.05239236354827881,
|
219 |
"step": 130
|
220 |
},
|
221 |
{
|
222 |
+
"epoch": 0.76,
|
223 |
+
"grad_norm": 437.6664172133176,
|
224 |
+
"learning_rate": 8.271734841028552e-09,
|
225 |
+
"logits/chosen": -0.9155263900756836,
|
226 |
+
"logits/rejected": -0.6956790685653687,
|
227 |
+
"logps/chosen": -343.3347473144531,
|
228 |
+
"logps/rejected": -319.8084411621094,
|
229 |
+
"loss": 0.4456,
|
230 |
+
"rewards/accuracies": 0.778124988079071,
|
231 |
+
"rewards/chosen": 1.1650675535202026,
|
232 |
+
"rewards/margins": 1.0745846033096313,
|
233 |
+
"rewards/rejected": 0.09048305451869965,
|
234 |
"step": 140
|
235 |
},
|
236 |
{
|
237 |
+
"epoch": 0.81,
|
238 |
+
"grad_norm": 547.6078246967138,
|
239 |
+
"learning_rate": 5.0580093109461184e-09,
|
240 |
+
"logits/chosen": -0.8691400289535522,
|
241 |
+
"logits/rejected": -0.42544013261795044,
|
242 |
+
"logps/chosen": -331.86328125,
|
243 |
+
"logps/rejected": -290.8024597167969,
|
244 |
+
"loss": 0.4532,
|
245 |
+
"rewards/accuracies": 0.7906249761581421,
|
246 |
+
"rewards/chosen": 1.2534332275390625,
|
247 |
+
"rewards/margins": 1.3035576343536377,
|
248 |
+
"rewards/rejected": -0.05012448504567146,
|
249 |
"step": 150
|
250 |
},
|
251 |
{
|
252 |
+
"epoch": 0.87,
|
253 |
+
"grad_norm": 491.3660284508518,
|
254 |
+
"learning_rate": 2.5650392827160444e-09,
|
255 |
+
"logits/chosen": -0.8275870084762573,
|
256 |
+
"logits/rejected": -0.5543009042739868,
|
257 |
+
"logps/chosen": -331.1053466796875,
|
258 |
+
"logps/rejected": -297.59521484375,
|
259 |
+
"loss": 0.4545,
|
260 |
+
"rewards/accuracies": 0.765625,
|
261 |
+
"rewards/chosen": 1.089099645614624,
|
262 |
+
"rewards/margins": 1.1679441928863525,
|
263 |
+
"rewards/rejected": -0.07884454727172852,
|
264 |
"step": 160
|
265 |
},
|
266 |
{
|
267 |
+
"epoch": 0.92,
|
268 |
+
"grad_norm": 411.0060600793946,
|
269 |
+
"learning_rate": 8.829271885286093e-10,
|
270 |
+
"logits/chosen": -0.9505836367607117,
|
271 |
+
"logits/rejected": -0.7405703067779541,
|
272 |
+
"logps/chosen": -334.4549865722656,
|
273 |
+
"logps/rejected": -305.47021484375,
|
274 |
+
"loss": 0.4601,
|
275 |
+
"rewards/accuracies": 0.796875,
|
276 |
+
"rewards/chosen": 1.0208919048309326,
|
277 |
+
"rewards/margins": 1.136797308921814,
|
278 |
+
"rewards/rejected": -0.11590544134378433,
|
279 |
"step": 170
|
280 |
},
|
281 |
{
|
282 |
+
"epoch": 0.98,
|
283 |
+
"grad_norm": 459.96112806668134,
|
284 |
+
"learning_rate": 7.246894216806354e-11,
|
285 |
+
"logits/chosen": -0.7150970697402954,
|
286 |
+
"logits/rejected": -0.2916146218776703,
|
287 |
+
"logps/chosen": -316.0776062011719,
|
288 |
+
"logps/rejected": -275.6624450683594,
|
289 |
+
"loss": 0.4537,
|
290 |
+
"rewards/accuracies": 0.762499988079071,
|
291 |
+
"rewards/chosen": 1.0451656579971313,
|
292 |
+
"rewards/margins": 1.1178200244903564,
|
293 |
+
"rewards/rejected": -0.07265423238277435,
|
294 |
"step": 180
|
295 |
},
|
296 |
{
|
297 |
"epoch": 1.0,
|
298 |
+
"step": 184,
|
299 |
"total_flos": 0.0,
|
300 |
+
"train_loss": 0.5129612217778745,
|
301 |
+
"train_runtime": 5390.6556,
|
302 |
+
"train_samples_per_second": 8.757,
|
303 |
"train_steps_per_second": 0.034
|
304 |
}
|
305 |
],
|
306 |
"logging_steps": 10,
|
307 |
+
"max_steps": 184,
|
308 |
"num_input_tokens_seen": 0,
|
309 |
"num_train_epochs": 1,
|
310 |
"save_steps": 100,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ca21fcd79c3354846e41641aa75c59cd98e420df7941b233d35b073affbb313
|
3 |
+
size 6264
|