Training in progress, step 2560, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4725595416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a79993ca3bb2e40d715e49b6365049f27102e49dd8b3a9ce020c6ea5a9f9fe9
|
3 |
size 4725595416
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9179193343
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1bf2691edb5f20acb6de9eb1f6120c2449bed48ca00eecc968c5be167084b7bb
|
3 |
size 9179193343
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c349c1691bbeda5a6b16abd459bd4b17c698c1ae8b87b93b48229ee14acd38e
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28472ecbb49d175fddb5467d2d36c375ce76e352a7c4d1642d73ecb32735946a
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric":
|
3 |
-
"best_model_checkpoint": "mgh6/TCS_Pairing_VAE/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 512,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -11,502 +11,102 @@
|
|
11 |
{
|
12 |
"epoch": 0.02,
|
13 |
"learning_rate": 9.962169351263485e-05,
|
14 |
-
"loss":
|
15 |
"step": 256
|
16 |
},
|
17 |
{
|
18 |
"epoch": 0.04,
|
19 |
"learning_rate": 9.92433870252697e-05,
|
20 |
-
"loss":
|
21 |
"step": 512
|
22 |
},
|
23 |
{
|
24 |
"epoch": 0.04,
|
25 |
-
"eval_loss":
|
26 |
-
"eval_runtime":
|
27 |
-
"eval_samples_per_second":
|
28 |
-
"eval_steps_per_second":
|
29 |
"step": 512
|
30 |
},
|
31 |
{
|
32 |
"epoch": 0.06,
|
33 |
"learning_rate": 9.886508053790455e-05,
|
34 |
-
"loss":
|
35 |
"step": 768
|
36 |
},
|
37 |
{
|
38 |
"epoch": 0.08,
|
39 |
"learning_rate": 9.848677405053938e-05,
|
40 |
-
"loss":
|
41 |
"step": 1024
|
42 |
},
|
43 |
{
|
44 |
"epoch": 0.08,
|
45 |
-
"eval_loss":
|
46 |
-
"eval_runtime":
|
47 |
-
"eval_samples_per_second":
|
48 |
-
"eval_steps_per_second":
|
49 |
"step": 1024
|
50 |
},
|
51 |
{
|
52 |
"epoch": 0.09,
|
53 |
"learning_rate": 9.810846756317423e-05,
|
54 |
-
"loss":
|
55 |
"step": 1280
|
56 |
},
|
57 |
{
|
58 |
"epoch": 0.11,
|
59 |
"learning_rate": 9.773016107580908e-05,
|
60 |
-
"loss":
|
61 |
"step": 1536
|
62 |
},
|
63 |
{
|
64 |
"epoch": 0.11,
|
65 |
-
"eval_loss":
|
66 |
-
"eval_runtime":
|
67 |
-
"eval_samples_per_second":
|
68 |
-
"eval_steps_per_second":
|
69 |
"step": 1536
|
70 |
},
|
71 |
{
|
72 |
"epoch": 0.13,
|
73 |
"learning_rate": 9.735185458844393e-05,
|
74 |
-
"loss":
|
75 |
"step": 1792
|
76 |
},
|
77 |
{
|
78 |
"epoch": 0.15,
|
79 |
"learning_rate": 9.697354810107877e-05,
|
80 |
-
"loss":
|
81 |
"step": 2048
|
82 |
},
|
83 |
{
|
84 |
"epoch": 0.15,
|
85 |
-
"eval_loss":
|
86 |
-
"eval_runtime":
|
87 |
-
"eval_samples_per_second":
|
88 |
-
"eval_steps_per_second":
|
89 |
"step": 2048
|
90 |
},
|
91 |
{
|
92 |
"epoch": 0.17,
|
93 |
"learning_rate": 9.659524161371362e-05,
|
94 |
-
"loss":
|
95 |
"step": 2304
|
96 |
},
|
97 |
{
|
98 |
"epoch": 0.19,
|
99 |
"learning_rate": 9.621693512634847e-05,
|
100 |
-
"loss":
|
101 |
"step": 2560
|
102 |
},
|
103 |
{
|
104 |
"epoch": 0.19,
|
105 |
-
"eval_loss":
|
106 |
-
"eval_runtime":
|
107 |
-
"eval_samples_per_second":
|
108 |
-
"eval_steps_per_second":
|
109 |
"step": 2560
|
110 |
-
},
|
111 |
-
{
|
112 |
-
"epoch": 0.21,
|
113 |
-
"learning_rate": 9.58386286389833e-05,
|
114 |
-
"loss": 20474.4043,
|
115 |
-
"step": 2816
|
116 |
-
},
|
117 |
-
{
|
118 |
-
"epoch": 0.23,
|
119 |
-
"learning_rate": 9.546032215161815e-05,
|
120 |
-
"loss": 18948.6875,
|
121 |
-
"step": 3072
|
122 |
-
},
|
123 |
-
{
|
124 |
-
"epoch": 0.23,
|
125 |
-
"eval_loss": 16196.150390625,
|
126 |
-
"eval_runtime": 62.7371,
|
127 |
-
"eval_samples_per_second": 54.163,
|
128 |
-
"eval_steps_per_second": 54.163,
|
129 |
-
"step": 3072
|
130 |
-
},
|
131 |
-
{
|
132 |
-
"epoch": 0.25,
|
133 |
-
"learning_rate": 9.5082015664253e-05,
|
134 |
-
"loss": 17958.9785,
|
135 |
-
"step": 3328
|
136 |
-
},
|
137 |
-
{
|
138 |
-
"epoch": 0.26,
|
139 |
-
"learning_rate": 9.470370917688785e-05,
|
140 |
-
"loss": 17018.3223,
|
141 |
-
"step": 3584
|
142 |
-
},
|
143 |
-
{
|
144 |
-
"epoch": 0.26,
|
145 |
-
"eval_loss": 15006.568359375,
|
146 |
-
"eval_runtime": 59.6957,
|
147 |
-
"eval_samples_per_second": 56.922,
|
148 |
-
"eval_steps_per_second": 56.922,
|
149 |
-
"step": 3584
|
150 |
-
},
|
151 |
-
{
|
152 |
-
"epoch": 0.28,
|
153 |
-
"learning_rate": 9.43254026895227e-05,
|
154 |
-
"loss": 16859.2793,
|
155 |
-
"step": 3840
|
156 |
-
},
|
157 |
-
{
|
158 |
-
"epoch": 0.3,
|
159 |
-
"learning_rate": 9.394709620215754e-05,
|
160 |
-
"loss": 16723.5449,
|
161 |
-
"step": 4096
|
162 |
-
},
|
163 |
-
{
|
164 |
-
"epoch": 0.3,
|
165 |
-
"eval_loss": 14789.91796875,
|
166 |
-
"eval_runtime": 59.5641,
|
167 |
-
"eval_samples_per_second": 57.048,
|
168 |
-
"eval_steps_per_second": 57.048,
|
169 |
-
"step": 4096
|
170 |
-
},
|
171 |
-
{
|
172 |
-
"epoch": 0.32,
|
173 |
-
"learning_rate": 9.356878971479238e-05,
|
174 |
-
"loss": 16488.6934,
|
175 |
-
"step": 4352
|
176 |
-
},
|
177 |
-
{
|
178 |
-
"epoch": 0.34,
|
179 |
-
"learning_rate": 9.319048322742722e-05,
|
180 |
-
"loss": 16159.4502,
|
181 |
-
"step": 4608
|
182 |
-
},
|
183 |
-
{
|
184 |
-
"epoch": 0.34,
|
185 |
-
"eval_loss": 14223.150390625,
|
186 |
-
"eval_runtime": 60.4382,
|
187 |
-
"eval_samples_per_second": 56.223,
|
188 |
-
"eval_steps_per_second": 56.223,
|
189 |
-
"step": 4608
|
190 |
-
},
|
191 |
-
{
|
192 |
-
"epoch": 0.36,
|
193 |
-
"learning_rate": 9.281217674006207e-05,
|
194 |
-
"loss": 16155.2217,
|
195 |
-
"step": 4864
|
196 |
-
},
|
197 |
-
{
|
198 |
-
"epoch": 0.38,
|
199 |
-
"learning_rate": 9.243387025269692e-05,
|
200 |
-
"loss": 16132.4766,
|
201 |
-
"step": 5120
|
202 |
-
},
|
203 |
-
{
|
204 |
-
"epoch": 0.38,
|
205 |
-
"eval_loss": 14493.603515625,
|
206 |
-
"eval_runtime": 61.3546,
|
207 |
-
"eval_samples_per_second": 55.383,
|
208 |
-
"eval_steps_per_second": 55.383,
|
209 |
-
"step": 5120
|
210 |
-
},
|
211 |
-
{
|
212 |
-
"epoch": 0.4,
|
213 |
-
"learning_rate": 9.205556376533177e-05,
|
214 |
-
"loss": 16020.0381,
|
215 |
-
"step": 5376
|
216 |
-
},
|
217 |
-
{
|
218 |
-
"epoch": 0.42,
|
219 |
-
"learning_rate": 9.167725727796661e-05,
|
220 |
-
"loss": 15904.9912,
|
221 |
-
"step": 5632
|
222 |
-
},
|
223 |
-
{
|
224 |
-
"epoch": 0.42,
|
225 |
-
"eval_loss": 14683.6728515625,
|
226 |
-
"eval_runtime": 62.3009,
|
227 |
-
"eval_samples_per_second": 54.542,
|
228 |
-
"eval_steps_per_second": 54.542,
|
229 |
-
"step": 5632
|
230 |
-
},
|
231 |
-
{
|
232 |
-
"epoch": 0.44,
|
233 |
-
"learning_rate": 9.129895079060146e-05,
|
234 |
-
"loss": 15820.0801,
|
235 |
-
"step": 5888
|
236 |
-
},
|
237 |
-
{
|
238 |
-
"epoch": 0.45,
|
239 |
-
"learning_rate": 9.09206443032363e-05,
|
240 |
-
"loss": 15531.0293,
|
241 |
-
"step": 6144
|
242 |
-
},
|
243 |
-
{
|
244 |
-
"epoch": 0.45,
|
245 |
-
"eval_loss": 14102.1162109375,
|
246 |
-
"eval_runtime": 59.6921,
|
247 |
-
"eval_samples_per_second": 56.925,
|
248 |
-
"eval_steps_per_second": 56.925,
|
249 |
-
"step": 6144
|
250 |
-
},
|
251 |
-
{
|
252 |
-
"epoch": 0.47,
|
253 |
-
"learning_rate": 9.054233781587114e-05,
|
254 |
-
"loss": 15656.3779,
|
255 |
-
"step": 6400
|
256 |
-
},
|
257 |
-
{
|
258 |
-
"epoch": 0.49,
|
259 |
-
"learning_rate": 9.016403132850599e-05,
|
260 |
-
"loss": 15575.4033,
|
261 |
-
"step": 6656
|
262 |
-
},
|
263 |
-
{
|
264 |
-
"epoch": 0.49,
|
265 |
-
"eval_loss": 13737.095703125,
|
266 |
-
"eval_runtime": 49.6762,
|
267 |
-
"eval_samples_per_second": 68.403,
|
268 |
-
"eval_steps_per_second": 68.403,
|
269 |
-
"step": 6656
|
270 |
-
},
|
271 |
-
{
|
272 |
-
"epoch": 0.51,
|
273 |
-
"learning_rate": 8.978572484114084e-05,
|
274 |
-
"loss": 15500.876,
|
275 |
-
"step": 6912
|
276 |
-
},
|
277 |
-
{
|
278 |
-
"epoch": 0.53,
|
279 |
-
"learning_rate": 8.940741835377569e-05,
|
280 |
-
"loss": 15452.5596,
|
281 |
-
"step": 7168
|
282 |
-
},
|
283 |
-
{
|
284 |
-
"epoch": 0.53,
|
285 |
-
"eval_loss": 14052.9873046875,
|
286 |
-
"eval_runtime": 49.0398,
|
287 |
-
"eval_samples_per_second": 69.291,
|
288 |
-
"eval_steps_per_second": 69.291,
|
289 |
-
"step": 7168
|
290 |
-
},
|
291 |
-
{
|
292 |
-
"epoch": 0.55,
|
293 |
-
"learning_rate": 8.902911186641053e-05,
|
294 |
-
"loss": 15443.3691,
|
295 |
-
"step": 7424
|
296 |
-
},
|
297 |
-
{
|
298 |
-
"epoch": 0.57,
|
299 |
-
"learning_rate": 8.865080537904538e-05,
|
300 |
-
"loss": 15420.5,
|
301 |
-
"step": 7680
|
302 |
-
},
|
303 |
-
{
|
304 |
-
"epoch": 0.57,
|
305 |
-
"eval_loss": 13470.451171875,
|
306 |
-
"eval_runtime": 48.9981,
|
307 |
-
"eval_samples_per_second": 69.35,
|
308 |
-
"eval_steps_per_second": 69.35,
|
309 |
-
"step": 7680
|
310 |
-
},
|
311 |
-
{
|
312 |
-
"epoch": 0.59,
|
313 |
-
"learning_rate": 8.827249889168022e-05,
|
314 |
-
"loss": 15402.9678,
|
315 |
-
"step": 7936
|
316 |
-
},
|
317 |
-
{
|
318 |
-
"epoch": 0.61,
|
319 |
-
"learning_rate": 8.789419240431506e-05,
|
320 |
-
"loss": 15077.2871,
|
321 |
-
"step": 8192
|
322 |
-
},
|
323 |
-
{
|
324 |
-
"epoch": 0.61,
|
325 |
-
"eval_loss": 13587.75,
|
326 |
-
"eval_runtime": 50.6149,
|
327 |
-
"eval_samples_per_second": 67.134,
|
328 |
-
"eval_steps_per_second": 67.134,
|
329 |
-
"step": 8192
|
330 |
-
},
|
331 |
-
{
|
332 |
-
"epoch": 0.62,
|
333 |
-
"learning_rate": 8.751588591694991e-05,
|
334 |
-
"loss": 14985.3994,
|
335 |
-
"step": 8448
|
336 |
-
},
|
337 |
-
{
|
338 |
-
"epoch": 0.64,
|
339 |
-
"learning_rate": 8.713757942958476e-05,
|
340 |
-
"loss": 15089.6094,
|
341 |
-
"step": 8704
|
342 |
-
},
|
343 |
-
{
|
344 |
-
"epoch": 0.64,
|
345 |
-
"eval_loss": 13345.4736328125,
|
346 |
-
"eval_runtime": 49.1305,
|
347 |
-
"eval_samples_per_second": 69.163,
|
348 |
-
"eval_steps_per_second": 69.163,
|
349 |
-
"step": 8704
|
350 |
-
},
|
351 |
-
{
|
352 |
-
"epoch": 0.66,
|
353 |
-
"learning_rate": 8.67592729422196e-05,
|
354 |
-
"loss": 15185.1504,
|
355 |
-
"step": 8960
|
356 |
-
},
|
357 |
-
{
|
358 |
-
"epoch": 0.68,
|
359 |
-
"learning_rate": 8.638096645485444e-05,
|
360 |
-
"loss": 15208.8115,
|
361 |
-
"step": 9216
|
362 |
-
},
|
363 |
-
{
|
364 |
-
"epoch": 0.68,
|
365 |
-
"eval_loss": 13608.46875,
|
366 |
-
"eval_runtime": 49.1786,
|
367 |
-
"eval_samples_per_second": 69.095,
|
368 |
-
"eval_steps_per_second": 69.095,
|
369 |
-
"step": 9216
|
370 |
-
},
|
371 |
-
{
|
372 |
-
"epoch": 0.7,
|
373 |
-
"learning_rate": 8.600265996748929e-05,
|
374 |
-
"loss": 15028.958,
|
375 |
-
"step": 9472
|
376 |
-
},
|
377 |
-
{
|
378 |
-
"epoch": 0.72,
|
379 |
-
"learning_rate": 8.562435348012414e-05,
|
380 |
-
"loss": 14966.6143,
|
381 |
-
"step": 9728
|
382 |
-
},
|
383 |
-
{
|
384 |
-
"epoch": 0.72,
|
385 |
-
"eval_loss": 12910.7265625,
|
386 |
-
"eval_runtime": 57.0213,
|
387 |
-
"eval_samples_per_second": 59.592,
|
388 |
-
"eval_steps_per_second": 59.592,
|
389 |
-
"step": 9728
|
390 |
-
},
|
391 |
-
{
|
392 |
-
"epoch": 0.74,
|
393 |
-
"learning_rate": 8.524604699275897e-05,
|
394 |
-
"loss": 14777.6895,
|
395 |
-
"step": 9984
|
396 |
-
},
|
397 |
-
{
|
398 |
-
"epoch": 0.76,
|
399 |
-
"learning_rate": 8.486774050539382e-05,
|
400 |
-
"loss": 14924.749,
|
401 |
-
"step": 10240
|
402 |
-
},
|
403 |
-
{
|
404 |
-
"epoch": 0.76,
|
405 |
-
"eval_loss": 13132.9765625,
|
406 |
-
"eval_runtime": 49.0092,
|
407 |
-
"eval_samples_per_second": 69.334,
|
408 |
-
"eval_steps_per_second": 69.334,
|
409 |
-
"step": 10240
|
410 |
-
},
|
411 |
-
{
|
412 |
-
"epoch": 0.78,
|
413 |
-
"learning_rate": 8.448943401802867e-05,
|
414 |
-
"loss": 14694.0498,
|
415 |
-
"step": 10496
|
416 |
-
},
|
417 |
-
{
|
418 |
-
"epoch": 0.79,
|
419 |
-
"learning_rate": 8.411112753066351e-05,
|
420 |
-
"loss": 14639.1904,
|
421 |
-
"step": 10752
|
422 |
-
},
|
423 |
-
{
|
424 |
-
"epoch": 0.79,
|
425 |
-
"eval_loss": 13389.201171875,
|
426 |
-
"eval_runtime": 53.5263,
|
427 |
-
"eval_samples_per_second": 63.483,
|
428 |
-
"eval_steps_per_second": 63.483,
|
429 |
-
"step": 10752
|
430 |
-
},
|
431 |
-
{
|
432 |
-
"epoch": 0.81,
|
433 |
-
"learning_rate": 8.373282104329836e-05,
|
434 |
-
"loss": 14814.9424,
|
435 |
-
"step": 11008
|
436 |
-
},
|
437 |
-
{
|
438 |
-
"epoch": 0.83,
|
439 |
-
"learning_rate": 8.335451455593321e-05,
|
440 |
-
"loss": 14682.3984,
|
441 |
-
"step": 11264
|
442 |
-
},
|
443 |
-
{
|
444 |
-
"epoch": 0.83,
|
445 |
-
"eval_loss": 13232.8876953125,
|
446 |
-
"eval_runtime": 48.9422,
|
447 |
-
"eval_samples_per_second": 69.429,
|
448 |
-
"eval_steps_per_second": 69.429,
|
449 |
-
"step": 11264
|
450 |
-
},
|
451 |
-
{
|
452 |
-
"epoch": 0.85,
|
453 |
-
"learning_rate": 8.297620806856804e-05,
|
454 |
-
"loss": 14631.1221,
|
455 |
-
"step": 11520
|
456 |
-
},
|
457 |
-
{
|
458 |
-
"epoch": 0.87,
|
459 |
-
"learning_rate": 8.259790158120289e-05,
|
460 |
-
"loss": 14661.7021,
|
461 |
-
"step": 11776
|
462 |
-
},
|
463 |
-
{
|
464 |
-
"epoch": 0.87,
|
465 |
-
"eval_loss": 13184.365234375,
|
466 |
-
"eval_runtime": 49.4282,
|
467 |
-
"eval_samples_per_second": 68.746,
|
468 |
-
"eval_steps_per_second": 68.746,
|
469 |
-
"step": 11776
|
470 |
-
},
|
471 |
-
{
|
472 |
-
"epoch": 0.89,
|
473 |
-
"learning_rate": 8.221959509383774e-05,
|
474 |
-
"loss": 14565.9648,
|
475 |
-
"step": 12032
|
476 |
-
},
|
477 |
-
{
|
478 |
-
"epoch": 0.91,
|
479 |
-
"learning_rate": 8.184128860647259e-05,
|
480 |
-
"loss": 14608.0898,
|
481 |
-
"step": 12288
|
482 |
-
},
|
483 |
-
{
|
484 |
-
"epoch": 0.91,
|
485 |
-
"eval_loss": 13070.6923828125,
|
486 |
-
"eval_runtime": 49.197,
|
487 |
-
"eval_samples_per_second": 69.069,
|
488 |
-
"eval_steps_per_second": 69.069,
|
489 |
-
"step": 12288
|
490 |
-
},
|
491 |
-
{
|
492 |
-
"epoch": 0.93,
|
493 |
-
"learning_rate": 8.146298211910744e-05,
|
494 |
-
"loss": 14447.6699,
|
495 |
-
"step": 12544
|
496 |
-
},
|
497 |
-
{
|
498 |
-
"epoch": 0.95,
|
499 |
-
"learning_rate": 8.108467563174228e-05,
|
500 |
-
"loss": 14423.6045,
|
501 |
-
"step": 12800
|
502 |
-
},
|
503 |
-
{
|
504 |
-
"epoch": 0.95,
|
505 |
-
"eval_loss": 12849.525390625,
|
506 |
-
"eval_runtime": 48.9783,
|
507 |
-
"eval_samples_per_second": 69.378,
|
508 |
-
"eval_steps_per_second": 69.378,
|
509 |
-
"step": 12800
|
510 |
}
|
511 |
],
|
512 |
"logging_steps": 256,
|
|
|
1 |
{
|
2 |
+
"best_metric": 19020.044921875,
|
3 |
+
"best_model_checkpoint": "mgh6/TCS_Pairing_VAE/checkpoint-2560",
|
4 |
+
"epoch": 0.18914450899082907,
|
5 |
"eval_steps": 512,
|
6 |
+
"global_step": 2560,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
11 |
{
|
12 |
"epoch": 0.02,
|
13 |
"learning_rate": 9.962169351263485e-05,
|
14 |
+
"loss": 122076.6875,
|
15 |
"step": 256
|
16 |
},
|
17 |
{
|
18 |
"epoch": 0.04,
|
19 |
"learning_rate": 9.92433870252697e-05,
|
20 |
+
"loss": 75620.8047,
|
21 |
"step": 512
|
22 |
},
|
23 |
{
|
24 |
"epoch": 0.04,
|
25 |
+
"eval_loss": 67218.640625,
|
26 |
+
"eval_runtime": 49.08,
|
27 |
+
"eval_samples_per_second": 69.234,
|
28 |
+
"eval_steps_per_second": 69.234,
|
29 |
"step": 512
|
30 |
},
|
31 |
{
|
32 |
"epoch": 0.06,
|
33 |
"learning_rate": 9.886508053790455e-05,
|
34 |
+
"loss": 71143.4766,
|
35 |
"step": 768
|
36 |
},
|
37 |
{
|
38 |
"epoch": 0.08,
|
39 |
"learning_rate": 9.848677405053938e-05,
|
40 |
+
"loss": 65495.2617,
|
41 |
"step": 1024
|
42 |
},
|
43 |
{
|
44 |
"epoch": 0.08,
|
45 |
+
"eval_loss": 55127.15234375,
|
46 |
+
"eval_runtime": 57.5577,
|
47 |
+
"eval_samples_per_second": 59.036,
|
48 |
+
"eval_steps_per_second": 59.036,
|
49 |
"step": 1024
|
50 |
},
|
51 |
{
|
52 |
"epoch": 0.09,
|
53 |
"learning_rate": 9.810846756317423e-05,
|
54 |
+
"loss": 57857.6445,
|
55 |
"step": 1280
|
56 |
},
|
57 |
{
|
58 |
"epoch": 0.11,
|
59 |
"learning_rate": 9.773016107580908e-05,
|
60 |
+
"loss": 49931.2188,
|
61 |
"step": 1536
|
62 |
},
|
63 |
{
|
64 |
"epoch": 0.11,
|
65 |
+
"eval_loss": 44795.0859375,
|
66 |
+
"eval_runtime": 62.2532,
|
67 |
+
"eval_samples_per_second": 54.584,
|
68 |
+
"eval_steps_per_second": 54.584,
|
69 |
"step": 1536
|
70 |
},
|
71 |
{
|
72 |
"epoch": 0.13,
|
73 |
"learning_rate": 9.735185458844393e-05,
|
74 |
+
"loss": 42072.0,
|
75 |
"step": 1792
|
76 |
},
|
77 |
{
|
78 |
"epoch": 0.15,
|
79 |
"learning_rate": 9.697354810107877e-05,
|
80 |
+
"loss": 35028.5938,
|
81 |
"step": 2048
|
82 |
},
|
83 |
{
|
84 |
"epoch": 0.15,
|
85 |
+
"eval_loss": 29700.298828125,
|
86 |
+
"eval_runtime": 72.4329,
|
87 |
+
"eval_samples_per_second": 46.912,
|
88 |
+
"eval_steps_per_second": 46.912,
|
89 |
"step": 2048
|
90 |
},
|
91 |
{
|
92 |
"epoch": 0.17,
|
93 |
"learning_rate": 9.659524161371362e-05,
|
94 |
+
"loss": 27458.9082,
|
95 |
"step": 2304
|
96 |
},
|
97 |
{
|
98 |
"epoch": 0.19,
|
99 |
"learning_rate": 9.621693512634847e-05,
|
100 |
+
"loss": 21147.1016,
|
101 |
"step": 2560
|
102 |
},
|
103 |
{
|
104 |
"epoch": 0.19,
|
105 |
+
"eval_loss": 19020.044921875,
|
106 |
+
"eval_runtime": 49.0881,
|
107 |
+
"eval_samples_per_second": 69.222,
|
108 |
+
"eval_steps_per_second": 69.222,
|
109 |
"step": 2560
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
}
|
111 |
],
|
112 |
"logging_steps": 256,
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4271
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb40828a1d4ffeac31865af000ab0d03e5851c44943c5a9b31dbbf1ac5027a97
|
3 |
size 4271
|