Training in progress, step 980000
Browse files- config.json +1 -1
- last-checkpoint/config.json +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- last-checkpoint/training_args.bin +1 -1
- pytorch_model.bin +1 -1
- training_args.bin +1 -1
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-
|
3 |
"architectures": [
|
4 |
"PIXELForPreTraining"
|
5 |
],
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-970000",
|
3 |
"architectures": [
|
4 |
"PIXELForPreTraining"
|
5 |
],
|
last-checkpoint/config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-
|
3 |
"architectures": [
|
4 |
"PIXELForPreTraining"
|
5 |
],
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-970000",
|
3 |
"architectures": [
|
4 |
"PIXELForPreTraining"
|
5 |
],
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa6e21575dd459731b96c75fb2eff44427788a2b21e2cba9f9983669023c697a
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:081e5eabe8ef9a2817820443cfba02d1a6ecee053832fff6fbfbe29c77150986
|
3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1f60f9446cba0320cf9ced93c4b14816af8d6988d011f7cc2f5b01e8ada101d
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 10.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -19406,11 +19406,211 @@
|
|
19406 |
"eval_samples_per_second": 884.178,
|
19407 |
"eval_steps_per_second": 13.857,
|
19408 |
"step": 970000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19409 |
}
|
19410 |
],
|
19411 |
"max_steps": 1000000,
|
19412 |
"num_train_epochs": 12,
|
19413 |
-
"total_flos": 6.
|
19414 |
"trial_name": null,
|
19415 |
"trial_params": null
|
19416 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 10.929814973846515,
|
5 |
+
"global_step": 980000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
19406 |
"eval_samples_per_second": 884.178,
|
19407 |
"eval_steps_per_second": 13.857,
|
19408 |
"step": 970000
|
19409 |
+
},
|
19410 |
+
{
|
19411 |
+
"epoch": 10.82,
|
19412 |
+
"learning_rate": 1.0332828229586692e-05,
|
19413 |
+
"loss": 0.1799,
|
19414 |
+
"step": 970500
|
19415 |
+
},
|
19416 |
+
{
|
19417 |
+
"epoch": 10.83,
|
19418 |
+
"learning_rate": 1.032165010471157e-05,
|
19419 |
+
"loss": 0.1796,
|
19420 |
+
"step": 971000
|
19421 |
+
},
|
19422 |
+
{
|
19423 |
+
"epoch": 10.83,
|
19424 |
+
"eval_loss": 0.17119638621807098,
|
19425 |
+
"eval_runtime": 2.5911,
|
19426 |
+
"eval_samples_per_second": 886.512,
|
19427 |
+
"eval_steps_per_second": 13.894,
|
19428 |
+
"step": 971000
|
19429 |
+
},
|
19430 |
+
{
|
19431 |
+
"epoch": 10.84,
|
19432 |
+
"learning_rate": 1.0310662477784401e-05,
|
19433 |
+
"loss": 0.1804,
|
19434 |
+
"step": 971500
|
19435 |
+
},
|
19436 |
+
{
|
19437 |
+
"epoch": 10.84,
|
19438 |
+
"learning_rate": 1.0299865378844936e-05,
|
19439 |
+
"loss": 0.1798,
|
19440 |
+
"step": 972000
|
19441 |
+
},
|
19442 |
+
{
|
19443 |
+
"epoch": 10.84,
|
19444 |
+
"eval_loss": 0.1710081547498703,
|
19445 |
+
"eval_runtime": 2.5437,
|
19446 |
+
"eval_samples_per_second": 903.014,
|
19447 |
+
"eval_steps_per_second": 14.153,
|
19448 |
+
"step": 972000
|
19449 |
+
},
|
19450 |
+
{
|
19451 |
+
"epoch": 10.85,
|
19452 |
+
"learning_rate": 1.028925883741203e-05,
|
19453 |
+
"loss": 0.18,
|
19454 |
+
"step": 972500
|
19455 |
+
},
|
19456 |
+
{
|
19457 |
+
"epoch": 10.85,
|
19458 |
+
"learning_rate": 1.0278842882483569e-05,
|
19459 |
+
"loss": 0.1797,
|
19460 |
+
"step": 973000
|
19461 |
+
},
|
19462 |
+
{
|
19463 |
+
"epoch": 10.85,
|
19464 |
+
"eval_loss": 0.17146818339824677,
|
19465 |
+
"eval_runtime": 2.5692,
|
19466 |
+
"eval_samples_per_second": 894.045,
|
19467 |
+
"eval_steps_per_second": 14.012,
|
19468 |
+
"step": 973000
|
19469 |
+
},
|
19470 |
+
{
|
19471 |
+
"epoch": 10.86,
|
19472 |
+
"learning_rate": 1.026861754253637e-05,
|
19473 |
+
"loss": 0.1796,
|
19474 |
+
"step": 973500
|
19475 |
+
},
|
19476 |
+
{
|
19477 |
+
"epoch": 10.86,
|
19478 |
+
"learning_rate": 1.025858284552612e-05,
|
19479 |
+
"loss": 0.1797,
|
19480 |
+
"step": 974000
|
19481 |
+
},
|
19482 |
+
{
|
19483 |
+
"epoch": 10.86,
|
19484 |
+
"eval_loss": 0.1706797480583191,
|
19485 |
+
"eval_runtime": 2.6865,
|
19486 |
+
"eval_samples_per_second": 855.008,
|
19487 |
+
"eval_steps_per_second": 13.4,
|
19488 |
+
"step": 974000
|
19489 |
+
},
|
19490 |
+
{
|
19491 |
+
"epoch": 10.87,
|
19492 |
+
"learning_rate": 1.0248738818887307e-05,
|
19493 |
+
"loss": 0.1799,
|
19494 |
+
"step": 974500
|
19495 |
+
},
|
19496 |
+
{
|
19497 |
+
"epoch": 10.87,
|
19498 |
+
"learning_rate": 1.023908548953311e-05,
|
19499 |
+
"loss": 0.1799,
|
19500 |
+
"step": 975000
|
19501 |
+
},
|
19502 |
+
{
|
19503 |
+
"epoch": 10.87,
|
19504 |
+
"eval_loss": 0.1708817481994629,
|
19505 |
+
"eval_runtime": 2.5759,
|
19506 |
+
"eval_samples_per_second": 891.738,
|
19507 |
+
"eval_steps_per_second": 13.976,
|
19508 |
+
"step": 975000
|
19509 |
+
},
|
19510 |
+
{
|
19511 |
+
"epoch": 10.88,
|
19512 |
+
"learning_rate": 1.0229622883855378e-05,
|
19513 |
+
"loss": 0.1798,
|
19514 |
+
"step": 975500
|
19515 |
+
},
|
19516 |
+
{
|
19517 |
+
"epoch": 10.89,
|
19518 |
+
"learning_rate": 1.02203510277245e-05,
|
19519 |
+
"loss": 0.1796,
|
19520 |
+
"step": 976000
|
19521 |
+
},
|
19522 |
+
{
|
19523 |
+
"epoch": 10.89,
|
19524 |
+
"eval_loss": 0.1709393560886383,
|
19525 |
+
"eval_runtime": 2.6094,
|
19526 |
+
"eval_samples_per_second": 880.296,
|
19527 |
+
"eval_steps_per_second": 13.797,
|
19528 |
+
"step": 976000
|
19529 |
+
},
|
19530 |
+
{
|
19531 |
+
"epoch": 10.89,
|
19532 |
+
"learning_rate": 1.021126994648939e-05,
|
19533 |
+
"loss": 0.1801,
|
19534 |
+
"step": 976500
|
19535 |
+
},
|
19536 |
+
{
|
19537 |
+
"epoch": 10.9,
|
19538 |
+
"learning_rate": 1.0202379664977364e-05,
|
19539 |
+
"loss": 0.1799,
|
19540 |
+
"step": 977000
|
19541 |
+
},
|
19542 |
+
{
|
19543 |
+
"epoch": 10.9,
|
19544 |
+
"eval_loss": 0.17174768447875977,
|
19545 |
+
"eval_runtime": 2.6289,
|
19546 |
+
"eval_samples_per_second": 873.739,
|
19547 |
+
"eval_steps_per_second": 13.694,
|
19548 |
+
"step": 977000
|
19549 |
+
},
|
19550 |
+
{
|
19551 |
+
"epoch": 10.9,
|
19552 |
+
"learning_rate": 1.019368020749412e-05,
|
19553 |
+
"loss": 0.1797,
|
19554 |
+
"step": 977500
|
19555 |
+
},
|
19556 |
+
{
|
19557 |
+
"epoch": 10.91,
|
19558 |
+
"learning_rate": 1.018517159782365e-05,
|
19559 |
+
"loss": 0.1797,
|
19560 |
+
"step": 978000
|
19561 |
+
},
|
19562 |
+
{
|
19563 |
+
"epoch": 10.91,
|
19564 |
+
"eval_loss": 0.16800174117088318,
|
19565 |
+
"eval_runtime": 2.57,
|
19566 |
+
"eval_samples_per_second": 893.767,
|
19567 |
+
"eval_steps_per_second": 14.008,
|
19568 |
+
"step": 978000
|
19569 |
+
},
|
19570 |
+
{
|
19571 |
+
"epoch": 10.91,
|
19572 |
+
"learning_rate": 1.0176853859228149e-05,
|
19573 |
+
"loss": 0.1794,
|
19574 |
+
"step": 978500
|
19575 |
+
},
|
19576 |
+
{
|
19577 |
+
"epoch": 10.92,
|
19578 |
+
"learning_rate": 1.0168727014448004e-05,
|
19579 |
+
"loss": 0.1794,
|
19580 |
+
"step": 979000
|
19581 |
+
},
|
19582 |
+
{
|
19583 |
+
"epoch": 10.92,
|
19584 |
+
"eval_loss": 0.16953879594802856,
|
19585 |
+
"eval_runtime": 2.6173,
|
19586 |
+
"eval_samples_per_second": 877.629,
|
19587 |
+
"eval_steps_per_second": 13.755,
|
19588 |
+
"step": 979000
|
19589 |
+
},
|
19590 |
+
{
|
19591 |
+
"epoch": 10.92,
|
19592 |
+
"learning_rate": 1.0160791085701714e-05,
|
19593 |
+
"loss": 0.1798,
|
19594 |
+
"step": 979500
|
19595 |
+
},
|
19596 |
+
{
|
19597 |
+
"epoch": 10.93,
|
19598 |
+
"learning_rate": 1.0153046094685783e-05,
|
19599 |
+
"loss": 0.1794,
|
19600 |
+
"step": 980000
|
19601 |
+
},
|
19602 |
+
{
|
19603 |
+
"epoch": 10.93,
|
19604 |
+
"eval_loss": 0.1709355264902115,
|
19605 |
+
"eval_runtime": 2.587,
|
19606 |
+
"eval_samples_per_second": 887.915,
|
19607 |
+
"eval_steps_per_second": 13.916,
|
19608 |
+
"step": 980000
|
19609 |
}
|
19610 |
],
|
19611 |
"max_steps": 1000000,
|
19612 |
"num_train_epochs": 12,
|
19613 |
+
"total_flos": 6.869770816498864e+22,
|
19614 |
"trial_name": null,
|
19615 |
"trial_params": null
|
19616 |
}
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3311
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a854cf81d57a7e4747d79eeee0e792b9b0db2dfcccddbeaecfbfa4a0ff53eef
|
3 |
size 3311
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:081e5eabe8ef9a2817820443cfba02d1a6ecee053832fff6fbfbe29c77150986
|
3 |
size 449471589
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3311
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a854cf81d57a7e4747d79eeee0e792b9b0db2dfcccddbeaecfbfa4a0ff53eef
|
3 |
size 3311
|