Training in progress, step 220000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23ad25468e4976ad356c489ff9de5e4c29c45d72b9ad898ab44604c1735d5242
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81504f00d1476d46795ad7661bb9dd3cce0e3b34e6dbb42a795318db4fca7d75
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee2707666e6c7a8def868f81ca47e73ce94f4a6c51cea81c3c277b3144ea3665
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2fe453041a1ef69b9f76a6ad7eaaaabaa16df5e5897736ce1fd0389d49af5e8d
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10d49f5030c11ac17c769e132afcac76be5010c8f6689315786413bc5d6a1915
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:023e578a11b41b3489c2463eeb4e215ba05a1e3d40fd6a117618630fc634702a
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e92b0700bb181d8ec04bb405e4281e6135a07379cd92ad2d3e2e3a7f7d5ff4d
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c1c26bd3f13243d197acaedea2ad1ddc3602f0e2685b06d1fa05a341140be98
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 3.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -2586,6 +2586,66 @@
|
|
2586 |
"learning_rate": 9.530522177949888e-05,
|
2587 |
"loss": 0.3326,
|
2588 |
"step": 215000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2589 |
}
|
2590 |
],
|
2591 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.74468085106383,
|
5 |
+
"global_step": 220000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
2586 |
"learning_rate": 9.530522177949888e-05,
|
2587 |
"loss": 0.3326,
|
2588 |
"step": 215000
|
2589 |
+
},
|
2590 |
+
{
|
2591 |
+
"epoch": 3.67,
|
2592 |
+
"learning_rate": 9.509098552481625e-05,
|
2593 |
+
"loss": 0.3326,
|
2594 |
+
"step": 215500
|
2595 |
+
},
|
2596 |
+
{
|
2597 |
+
"epoch": 3.68,
|
2598 |
+
"learning_rate": 9.487617114738049e-05,
|
2599 |
+
"loss": 0.3326,
|
2600 |
+
"step": 216000
|
2601 |
+
},
|
2602 |
+
{
|
2603 |
+
"epoch": 3.69,
|
2604 |
+
"learning_rate": 9.466164001565799e-05,
|
2605 |
+
"loss": 0.333,
|
2606 |
+
"step": 216500
|
2607 |
+
},
|
2608 |
+
{
|
2609 |
+
"epoch": 3.69,
|
2610 |
+
"learning_rate": 9.444653440347767e-05,
|
2611 |
+
"loss": 0.3327,
|
2612 |
+
"step": 217000
|
2613 |
+
},
|
2614 |
+
{
|
2615 |
+
"epoch": 3.7,
|
2616 |
+
"learning_rate": 9.423128620983511e-05,
|
2617 |
+
"loss": 0.3324,
|
2618 |
+
"step": 217500
|
2619 |
+
},
|
2620 |
+
{
|
2621 |
+
"epoch": 3.71,
|
2622 |
+
"learning_rate": 9.401589755914307e-05,
|
2623 |
+
"loss": 0.3324,
|
2624 |
+
"step": 218000
|
2625 |
+
},
|
2626 |
+
{
|
2627 |
+
"epoch": 3.72,
|
2628 |
+
"learning_rate": 9.380037057720053e-05,
|
2629 |
+
"loss": 0.3321,
|
2630 |
+
"step": 218500
|
2631 |
+
},
|
2632 |
+
{
|
2633 |
+
"epoch": 3.73,
|
2634 |
+
"learning_rate": 9.358470739117187e-05,
|
2635 |
+
"loss": 0.3322,
|
2636 |
+
"step": 219000
|
2637 |
+
},
|
2638 |
+
{
|
2639 |
+
"epoch": 3.74,
|
2640 |
+
"learning_rate": 9.336891012956562e-05,
|
2641 |
+
"loss": 0.3321,
|
2642 |
+
"step": 219500
|
2643 |
+
},
|
2644 |
+
{
|
2645 |
+
"epoch": 3.74,
|
2646 |
+
"learning_rate": 9.31529809222136e-05,
|
2647 |
+
"loss": 0.3317,
|
2648 |
+
"step": 220000
|
2649 |
}
|
2650 |
],
|
2651 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81504f00d1476d46795ad7661bb9dd3cce0e3b34e6dbb42a795318db4fca7d75
|
3 |
size 201355195
|