Training in progress, step 490000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abdc666b2f3669fde2f13b1cbce8537a9750ab5e917c8e2f654b514fc145c70c
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99bd6ef8a1b85dd6a22f6aedd2dc916de7e85d96497ce03a01c5ad35aba260ef
|
3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5a73351b78231930c5e38a85e2db75ffb99765eca05e9e6dd123f382ddd4cb3
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7260623fa55e4f39900f0d796a360342ed8e000aa7fbed24d40632bf5f5532f3
|
3 |
+
size 14439
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ccfe48daf5b331e0d6c664328074c7da11a0476f84c219e54335158a88175b91
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cff816a3de440d565f73bab1c06a61b794b87400c0cb82ffdc2d9ef43530b338
|
3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74800ce917e328df8d2e651e5da6a2b131e41e32b116f92b00e5f62a5503f854
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 7.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -3558,11 +3558,85 @@
|
|
3558 |
"eval_samples_per_second": 966.468,
|
3559 |
"eval_steps_per_second": 15.463,
|
3560 |
"step": 480000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3561 |
}
|
3562 |
],
|
3563 |
"max_steps": 1000000,
|
3564 |
"num_train_epochs": 16,
|
3565 |
-
"total_flos": 3.
|
3566 |
"trial_name": null,
|
3567 |
"trial_params": null
|
3568 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 7.482401087238689,
|
5 |
+
"global_step": 490000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
3558 |
"eval_samples_per_second": 966.468,
|
3559 |
"eval_steps_per_second": 15.463,
|
3560 |
"step": 480000
|
3561 |
+
},
|
3562 |
+
{
|
3563 |
+
"epoch": 7.34,
|
3564 |
+
"learning_rate": 9.014947164477721e-05,
|
3565 |
+
"loss": 0.2663,
|
3566 |
+
"step": 481000
|
3567 |
+
},
|
3568 |
+
{
|
3569 |
+
"epoch": 7.36,
|
3570 |
+
"learning_rate": 8.992037695672967e-05,
|
3571 |
+
"loss": 0.267,
|
3572 |
+
"step": 482000
|
3573 |
+
},
|
3574 |
+
{
|
3575 |
+
"epoch": 7.38,
|
3576 |
+
"learning_rate": 8.969117378102912e-05,
|
3577 |
+
"loss": 0.2665,
|
3578 |
+
"step": 483000
|
3579 |
+
},
|
3580 |
+
{
|
3581 |
+
"epoch": 7.39,
|
3582 |
+
"learning_rate": 8.946186462420478e-05,
|
3583 |
+
"loss": 0.2662,
|
3584 |
+
"step": 484000
|
3585 |
+
},
|
3586 |
+
{
|
3587 |
+
"epoch": 7.41,
|
3588 |
+
"learning_rate": 8.923245199394482e-05,
|
3589 |
+
"loss": 0.2662,
|
3590 |
+
"step": 485000
|
3591 |
+
},
|
3592 |
+
{
|
3593 |
+
"epoch": 7.41,
|
3594 |
+
"eval_runtime": 1.0079,
|
3595 |
+
"eval_samples_per_second": 992.191,
|
3596 |
+
"eval_steps_per_second": 15.875,
|
3597 |
+
"step": 485000
|
3598 |
+
},
|
3599 |
+
{
|
3600 |
+
"epoch": 7.42,
|
3601 |
+
"learning_rate": 8.900293839906903e-05,
|
3602 |
+
"loss": 0.2664,
|
3603 |
+
"step": 486000
|
3604 |
+
},
|
3605 |
+
{
|
3606 |
+
"epoch": 7.44,
|
3607 |
+
"learning_rate": 8.87733263495013e-05,
|
3608 |
+
"loss": 0.2658,
|
3609 |
+
"step": 487000
|
3610 |
+
},
|
3611 |
+
{
|
3612 |
+
"epoch": 7.45,
|
3613 |
+
"learning_rate": 8.85436183562422e-05,
|
3614 |
+
"loss": 0.2659,
|
3615 |
+
"step": 488000
|
3616 |
+
},
|
3617 |
+
{
|
3618 |
+
"epoch": 7.47,
|
3619 |
+
"learning_rate": 8.83138169313416e-05,
|
3620 |
+
"loss": 0.2663,
|
3621 |
+
"step": 489000
|
3622 |
+
},
|
3623 |
+
{
|
3624 |
+
"epoch": 7.48,
|
3625 |
+
"learning_rate": 8.808392458787103e-05,
|
3626 |
+
"loss": 0.2656,
|
3627 |
+
"step": 490000
|
3628 |
+
},
|
3629 |
+
{
|
3630 |
+
"epoch": 7.48,
|
3631 |
+
"eval_runtime": 1.075,
|
3632 |
+
"eval_samples_per_second": 930.213,
|
3633 |
+
"eval_steps_per_second": 14.883,
|
3634 |
+
"step": 490000
|
3635 |
}
|
3636 |
],
|
3637 |
"max_steps": 1000000,
|
3638 |
"num_train_epochs": 16,
|
3639 |
+
"total_flos": 3.434908191009969e+22,
|
3640 |
"trial_name": null,
|
3641 |
"trial_params": null
|
3642 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99bd6ef8a1b85dd6a22f6aedd2dc916de7e85d96497ce03a01c5ad35aba260ef
|
3 |
size 449471589
|