Training in progress, step 220000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5059207bb8f7474338f46ad1796dad829da238389ac39c9635ee8acd02116b3
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16425812d9833a8d95dd48a3d0184c98f7dd2e9492241f873f5a9c40ae42e711
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbb3e9af62ed3a2fb4c21b50702e890077b229911fb66c25fe62db896cd6936e
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4ff3f7483f367b532a6c013e9bc7674cfefe270148b92273e1eb6f4b0a5602b
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:928424c3508969d829d7738ab145a1179c0a63e17f1b187a3210dde559b6b11f
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76453a1bfef9b5a2e590753690e7f9c0a75725420ccdcc9cd813f49cb07d76d3
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0731dd739865cc8a6e81a2a5c07511dd29f52f75cc4568cab5c57bfbd230f902
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7710cccc15b1f8a7c7c1d746163ba63db0690c041a48dce113664b73ba5009b
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 3.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -2586,6 +2586,66 @@
|
|
2586 |
"learning_rate": 9.53077968553459e-05,
|
2587 |
"loss": 0.3339,
|
2588 |
"step": 215000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2589 |
}
|
2590 |
],
|
2591 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.7446745134083965,
|
5 |
+
"global_step": 220000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
2586 |
"learning_rate": 9.53077968553459e-05,
|
2587 |
"loss": 0.3339,
|
2588 |
"step": 215000
|
2589 |
+
},
|
2590 |
+
{
|
2591 |
+
"epoch": 3.67,
|
2592 |
+
"learning_rate": 9.509313291996582e-05,
|
2593 |
+
"loss": 0.3342,
|
2594 |
+
"step": 215500
|
2595 |
+
},
|
2596 |
+
{
|
2597 |
+
"epoch": 3.68,
|
2598 |
+
"learning_rate": 9.487832002145718e-05,
|
2599 |
+
"loss": 0.334,
|
2600 |
+
"step": 216000
|
2601 |
+
},
|
2602 |
+
{
|
2603 |
+
"epoch": 3.69,
|
2604 |
+
"learning_rate": 9.466336027993655e-05,
|
2605 |
+
"loss": 0.3339,
|
2606 |
+
"step": 216500
|
2607 |
+
},
|
2608 |
+
{
|
2609 |
+
"epoch": 3.69,
|
2610 |
+
"learning_rate": 9.44482558169698e-05,
|
2611 |
+
"loss": 0.3339,
|
2612 |
+
"step": 217000
|
2613 |
+
},
|
2614 |
+
{
|
2615 |
+
"epoch": 3.7,
|
2616 |
+
"learning_rate": 9.423300875555111e-05,
|
2617 |
+
"loss": 0.3339,
|
2618 |
+
"step": 217500
|
2619 |
+
},
|
2620 |
+
{
|
2621 |
+
"epoch": 3.71,
|
2622 |
+
"learning_rate": 9.401762122008212e-05,
|
2623 |
+
"loss": 0.3334,
|
2624 |
+
"step": 218000
|
2625 |
+
},
|
2626 |
+
{
|
2627 |
+
"epoch": 3.72,
|
2628 |
+
"learning_rate": 9.380209533635084e-05,
|
2629 |
+
"loss": 0.3341,
|
2630 |
+
"step": 218500
|
2631 |
+
},
|
2632 |
+
{
|
2633 |
+
"epoch": 3.73,
|
2634 |
+
"learning_rate": 9.35864332315107e-05,
|
2635 |
+
"loss": 0.3341,
|
2636 |
+
"step": 219000
|
2637 |
+
},
|
2638 |
+
{
|
2639 |
+
"epoch": 3.74,
|
2640 |
+
"learning_rate": 9.337063703405964e-05,
|
2641 |
+
"loss": 0.3338,
|
2642 |
+
"step": 219500
|
2643 |
+
},
|
2644 |
+
{
|
2645 |
+
"epoch": 3.74,
|
2646 |
+
"learning_rate": 9.315514086042207e-05,
|
2647 |
+
"loss": 0.3338,
|
2648 |
+
"step": 220000
|
2649 |
}
|
2650 |
],
|
2651 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16425812d9833a8d95dd48a3d0184c98f7dd2e9492241f873f5a9c40ae42e711
|
3 |
size 201355195
|