Training in progress, step 225000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af1a79673a5b6d5cee3a3a09ba3527767f82d4eda46ac22ac39b18a2eb13953b
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4039cd75d98c8661603c3393b045a828da7ada49e3a4247ee249207549ea3f23
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07660aaed3828df44e1619608daf91ba946a7d05611095c1b2828014a3eed4c2
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e0d6650e3aa804347a6d020cbfcea97ea1dce169b5e9813935d7a9d91cc3e17
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a5c1361f3716c2354fee22856326fa37c7a5301115efa59c2a64b42ed30245c
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afb6c75154b8e32878b0db53a9b47ce57494208cfae906d711b4df50ce8d97b7
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61596a96592218d7e99e8d7b0e2760afa9d311b55e0a724bfa3a9e2796ce357c
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1d4287db9b8f098417d74a167fca464306830696cd8da7dd5e29de73dd9478a
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 3.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -2646,6 +2646,66 @@
|
|
2646 |
"learning_rate": 9.315514086042207e-05,
|
2647 |
"loss": 0.3338,
|
2648 |
"step": 220000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2649 |
}
|
2650 |
],
|
2651 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.829780172083642,
|
5 |
+
"global_step": 225000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
2646 |
"learning_rate": 9.315514086042207e-05,
|
2647 |
"loss": 0.3338,
|
2648 |
"step": 220000
|
2649 |
+
},
|
2650 |
+
{
|
2651 |
+
"epoch": 3.75,
|
2652 |
+
"learning_rate": 9.293994761279615e-05,
|
2653 |
+
"loss": 0.3336,
|
2654 |
+
"step": 220500
|
2655 |
+
},
|
2656 |
+
{
|
2657 |
+
"epoch": 3.76,
|
2658 |
+
"learning_rate": 9.272376268146326e-05,
|
2659 |
+
"loss": 0.3336,
|
2660 |
+
"step": 221000
|
2661 |
+
},
|
2662 |
+
{
|
2663 |
+
"epoch": 3.77,
|
2664 |
+
"learning_rate": 9.250745217172946e-05,
|
2665 |
+
"loss": 0.3336,
|
2666 |
+
"step": 221500
|
2667 |
+
},
|
2668 |
+
{
|
2669 |
+
"epoch": 3.78,
|
2670 |
+
"learning_rate": 9.229101821849224e-05,
|
2671 |
+
"loss": 0.3334,
|
2672 |
+
"step": 222000
|
2673 |
+
},
|
2674 |
+
{
|
2675 |
+
"epoch": 3.79,
|
2676 |
+
"learning_rate": 9.207446295786727e-05,
|
2677 |
+
"loss": 0.3335,
|
2678 |
+
"step": 222500
|
2679 |
+
},
|
2680 |
+
{
|
2681 |
+
"epoch": 3.8,
|
2682 |
+
"learning_rate": 9.185778852716756e-05,
|
2683 |
+
"loss": 0.3331,
|
2684 |
+
"step": 223000
|
2685 |
+
},
|
2686 |
+
{
|
2687 |
+
"epoch": 3.8,
|
2688 |
+
"learning_rate": 9.164099706488229e-05,
|
2689 |
+
"loss": 0.3333,
|
2690 |
+
"step": 223500
|
2691 |
+
},
|
2692 |
+
{
|
2693 |
+
"epoch": 3.81,
|
2694 |
+
"learning_rate": 9.14245246366035e-05,
|
2695 |
+
"loss": 0.3334,
|
2696 |
+
"step": 224000
|
2697 |
+
},
|
2698 |
+
{
|
2699 |
+
"epoch": 3.82,
|
2700 |
+
"learning_rate": 9.12075057545787e-05,
|
2701 |
+
"loss": 0.3332,
|
2702 |
+
"step": 224500
|
2703 |
+
},
|
2704 |
+
{
|
2705 |
+
"epoch": 3.83,
|
2706 |
+
"learning_rate": 9.099037625899677e-05,
|
2707 |
+
"loss": 0.3333,
|
2708 |
+
"step": 225000
|
2709 |
}
|
2710 |
],
|
2711 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4039cd75d98c8661603c3393b045a828da7ada49e3a4247ee249207549ea3f23
|
3 |
size 201355195
|