Training in progress, step 610000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:58bbdb63dd5f35ddeda47e9dc139795456694d0aba5e0383355b4522b67e77b3
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b37572e353c74d53a74256f6a8b831fe7ecaed9eb997d03f96697e3f38be10fb
|
3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e969b0a1f38bb709ff184bddc11e3e1b7a366f1f525d658e15c5e0e9638178b2
|
3 |
+
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e472d706bbb24da1ae339dbf5b5e166d6a3aff07e50beb6830b013698716675c
|
3 |
+
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb9d01b196a085088b251b99e2302b0af5c57d846827fd2f433da2dd23ebd86a
|
3 |
+
size 14567
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f9539e61c7b8fd54fd055839d1a6ea3b2cd4ea6f97a1aef7d0a7c91c2429be6
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71de57969ec7a3201bd7bf6de3d6e1584c8439f398aacc61865ba691cc2653d6
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 9.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -4446,11 +4446,85 @@
|
|
4446 |
"eval_samples_per_second": 928.316,
|
4447 |
"eval_steps_per_second": 14.853,
|
4448 |
"step": 600000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4449 |
}
|
4450 |
],
|
4451 |
"max_steps": 1000000,
|
4452 |
"num_train_epochs": 16,
|
4453 |
-
"total_flos": 4.
|
4454 |
"trial_name": null,
|
4455 |
"trial_params": null
|
4456 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 9.314825843297143,
|
5 |
+
"global_step": 610000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
4446 |
"eval_samples_per_second": 928.316,
|
4447 |
"eval_steps_per_second": 14.853,
|
4448 |
"step": 600000
|
4449 |
+
},
|
4450 |
+
{
|
4451 |
+
"epoch": 9.18,
|
4452 |
+
"learning_rate": 6.259170789846017e-05,
|
4453 |
+
"loss": 0.2546,
|
4454 |
+
"step": 601000
|
4455 |
+
},
|
4456 |
+
{
|
4457 |
+
"epoch": 9.19,
|
4458 |
+
"learning_rate": 6.236759027106965e-05,
|
4459 |
+
"loss": 0.2542,
|
4460 |
+
"step": 602000
|
4461 |
+
},
|
4462 |
+
{
|
4463 |
+
"epoch": 9.21,
|
4464 |
+
"learning_rate": 6.214366546888694e-05,
|
4465 |
+
"loss": 0.2541,
|
4466 |
+
"step": 603000
|
4467 |
+
},
|
4468 |
+
{
|
4469 |
+
"epoch": 9.22,
|
4470 |
+
"learning_rate": 6.191993594071785e-05,
|
4471 |
+
"loss": 0.2541,
|
4472 |
+
"step": 604000
|
4473 |
+
},
|
4474 |
+
{
|
4475 |
+
"epoch": 9.24,
|
4476 |
+
"learning_rate": 6.169640413323262e-05,
|
4477 |
+
"loss": 0.254,
|
4478 |
+
"step": 605000
|
4479 |
+
},
|
4480 |
+
{
|
4481 |
+
"epoch": 9.24,
|
4482 |
+
"eval_runtime": 1.0913,
|
4483 |
+
"eval_samples_per_second": 916.334,
|
4484 |
+
"eval_steps_per_second": 14.661,
|
4485 |
+
"step": 605000
|
4486 |
+
},
|
4487 |
+
{
|
4488 |
+
"epoch": 9.25,
|
4489 |
+
"learning_rate": 6.147307249093929e-05,
|
4490 |
+
"loss": 0.2537,
|
4491 |
+
"step": 606000
|
4492 |
+
},
|
4493 |
+
{
|
4494 |
+
"epoch": 9.27,
|
4495 |
+
"learning_rate": 6.124994345615693e-05,
|
4496 |
+
"loss": 0.2532,
|
4497 |
+
"step": 607000
|
4498 |
+
},
|
4499 |
+
{
|
4500 |
+
"epoch": 9.28,
|
4501 |
+
"learning_rate": 6.102701946898891e-05,
|
4502 |
+
"loss": 0.2536,
|
4503 |
+
"step": 608000
|
4504 |
+
},
|
4505 |
+
{
|
4506 |
+
"epoch": 9.3,
|
4507 |
+
"learning_rate": 6.0804302967296225e-05,
|
4508 |
+
"loss": 0.2545,
|
4509 |
+
"step": 609000
|
4510 |
+
},
|
4511 |
+
{
|
4512 |
+
"epoch": 9.31,
|
4513 |
+
"learning_rate": 6.058179638667089e-05,
|
4514 |
+
"loss": 0.2536,
|
4515 |
+
"step": 610000
|
4516 |
+
},
|
4517 |
+
{
|
4518 |
+
"epoch": 9.31,
|
4519 |
+
"eval_runtime": 1.0284,
|
4520 |
+
"eval_samples_per_second": 972.365,
|
4521 |
+
"eval_steps_per_second": 15.558,
|
4522 |
+
"step": 610000
|
4523 |
}
|
4524 |
],
|
4525 |
"max_steps": 1000000,
|
4526 |
"num_train_epochs": 16,
|
4527 |
+
"total_flos": 4.276110009237837e+22,
|
4528 |
"trial_name": null,
|
4529 |
"trial_params": null
|
4530 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b37572e353c74d53a74256f6a8b831fe7ecaed9eb997d03f96697e3f38be10fb
|
3 |
size 449471589
|