Training in progress, step 6000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +134 -3
- pytorch_model.bin +1 -1
- runs/Jun05_10-45-59_0a95bf9de5ac/events.out.tfevents.1685962630.0a95bf9de5ac.3272.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2000137067
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:546cd4e7ec947305a665edaa2e582e225d7b8301eaa78f72956ea9b0b6882be6
|
3 |
size 2000137067
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1002469625
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:938f724c0f91c737457d75986a81bb14510b0f67d394e6715914ee6427257f11
|
3 |
size 1002469625
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db097afe89ed3c3eb23a6fe209105077506d8139157678f90acd89f98273927d
|
3 |
size 14575
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ace18575c64fb9061a4bdb4187294f04e31e9a65a2e4da680ca78aeef9f963e2
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -268,11 +268,142 @@
|
|
268 |
"eval_samples_per_second": 2.402,
|
269 |
"eval_steps_per_second": 0.301,
|
270 |
"step": 4000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
271 |
}
|
272 |
],
|
273 |
"max_steps": 11448,
|
274 |
"num_train_epochs": 8,
|
275 |
-
"total_flos":
|
276 |
"trial_name": null,
|
277 |
"trial_params": null
|
278 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 4.1928721174004195,
|
5 |
+
"global_step": 6000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
268 |
"eval_samples_per_second": 2.402,
|
269 |
"eval_steps_per_second": 0.301,
|
270 |
"step": 4000
|
271 |
+
},
|
272 |
+
{
|
273 |
+
"epoch": 2.87,
|
274 |
+
"learning_rate": 4.105945462673223e-05,
|
275 |
+
"loss": 2.9079,
|
276 |
+
"step": 4100
|
277 |
+
},
|
278 |
+
{
|
279 |
+
"epoch": 2.94,
|
280 |
+
"learning_rate": 4.0500670540903e-05,
|
281 |
+
"loss": 2.8688,
|
282 |
+
"step": 4200
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 3.0,
|
286 |
+
"learning_rate": 3.994188645507376e-05,
|
287 |
+
"loss": 2.7398,
|
288 |
+
"step": 4300
|
289 |
+
},
|
290 |
+
{
|
291 |
+
"epoch": 3.07,
|
292 |
+
"learning_rate": 3.938310236924452e-05,
|
293 |
+
"loss": 2.5503,
|
294 |
+
"step": 4400
|
295 |
+
},
|
296 |
+
{
|
297 |
+
"epoch": 3.14,
|
298 |
+
"learning_rate": 3.882431828341529e-05,
|
299 |
+
"loss": 2.5121,
|
300 |
+
"step": 4500
|
301 |
+
},
|
302 |
+
{
|
303 |
+
"epoch": 3.21,
|
304 |
+
"learning_rate": 3.8265534197586055e-05,
|
305 |
+
"loss": 2.4625,
|
306 |
+
"step": 4600
|
307 |
+
},
|
308 |
+
{
|
309 |
+
"epoch": 3.28,
|
310 |
+
"learning_rate": 3.7706750111756815e-05,
|
311 |
+
"loss": 2.3833,
|
312 |
+
"step": 4700
|
313 |
+
},
|
314 |
+
{
|
315 |
+
"epoch": 3.35,
|
316 |
+
"learning_rate": 3.714796602592758e-05,
|
317 |
+
"loss": 2.3133,
|
318 |
+
"step": 4800
|
319 |
+
},
|
320 |
+
{
|
321 |
+
"epoch": 3.42,
|
322 |
+
"learning_rate": 3.658918194009835e-05,
|
323 |
+
"loss": 2.2152,
|
324 |
+
"step": 4900
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 3.49,
|
328 |
+
"learning_rate": 3.603039785426911e-05,
|
329 |
+
"loss": 2.2304,
|
330 |
+
"step": 5000
|
331 |
+
},
|
332 |
+
{
|
333 |
+
"epoch": 3.56,
|
334 |
+
"learning_rate": 3.5471613768439874e-05,
|
335 |
+
"loss": 2.1563,
|
336 |
+
"step": 5100
|
337 |
+
},
|
338 |
+
{
|
339 |
+
"epoch": 3.63,
|
340 |
+
"learning_rate": 3.491282968261064e-05,
|
341 |
+
"loss": 2.0991,
|
342 |
+
"step": 5200
|
343 |
+
},
|
344 |
+
{
|
345 |
+
"epoch": 3.7,
|
346 |
+
"learning_rate": 3.4354045596781406e-05,
|
347 |
+
"loss": 2.0702,
|
348 |
+
"step": 5300
|
349 |
+
},
|
350 |
+
{
|
351 |
+
"epoch": 3.77,
|
352 |
+
"learning_rate": 3.379526151095217e-05,
|
353 |
+
"loss": 1.9914,
|
354 |
+
"step": 5400
|
355 |
+
},
|
356 |
+
{
|
357 |
+
"epoch": 3.84,
|
358 |
+
"learning_rate": 3.323647742512294e-05,
|
359 |
+
"loss": 1.9235,
|
360 |
+
"step": 5500
|
361 |
+
},
|
362 |
+
{
|
363 |
+
"epoch": 3.91,
|
364 |
+
"learning_rate": 3.26776933392937e-05,
|
365 |
+
"loss": 1.8922,
|
366 |
+
"step": 5600
|
367 |
+
},
|
368 |
+
{
|
369 |
+
"epoch": 3.98,
|
370 |
+
"learning_rate": 3.2118909253464465e-05,
|
371 |
+
"loss": 1.8512,
|
372 |
+
"step": 5700
|
373 |
+
},
|
374 |
+
{
|
375 |
+
"epoch": 4.05,
|
376 |
+
"learning_rate": 3.156012516763523e-05,
|
377 |
+
"loss": 1.6966,
|
378 |
+
"step": 5800
|
379 |
+
},
|
380 |
+
{
|
381 |
+
"epoch": 4.12,
|
382 |
+
"learning_rate": 3.100134108180599e-05,
|
383 |
+
"loss": 1.6399,
|
384 |
+
"step": 5900
|
385 |
+
},
|
386 |
+
{
|
387 |
+
"epoch": 4.19,
|
388 |
+
"learning_rate": 3.0442556995976757e-05,
|
389 |
+
"loss": 1.5982,
|
390 |
+
"step": 6000
|
391 |
+
},
|
392 |
+
{
|
393 |
+
"epoch": 4.19,
|
394 |
+
"eval_bleu": 56.0747,
|
395 |
+
"eval_em": 0.0017,
|
396 |
+
"eval_gen_len": 43.9021,
|
397 |
+
"eval_loss": 1.7495189905166626,
|
398 |
+
"eval_runtime": 423.2589,
|
399 |
+
"eval_samples_per_second": 2.847,
|
400 |
+
"eval_steps_per_second": 0.357,
|
401 |
+
"step": 6000
|
402 |
}
|
403 |
],
|
404 |
"max_steps": 11448,
|
405 |
"num_train_epochs": 8,
|
406 |
+
"total_flos": 3958404255138432.0,
|
407 |
"trial_name": null,
|
408 |
"trial_params": null
|
409 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1002469625
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:938f724c0f91c737457d75986a81bb14510b0f67d394e6715914ee6427257f11
|
3 |
size 1002469625
|
runs/Jun05_10-45-59_0a95bf9de5ac/events.out.tfevents.1685962630.0a95bf9de5ac.3272.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a959ff46eca4f3545c8c05e80af7a2fe5e478b7b3102f1f41ccc3cfcf54b040
|
3 |
+
size 19210
|