Training in progress, step 360000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44471d6e6546be5ac1a0d86dea95ba4d44ec44baa5148bbd72a7ea895ad69cfc
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d3b9e249ae21e66394d1e5adda08ca3c78e35cfc386e28fe333440be7a14450
|
3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cd407f01a45e91c6a9d73f9ffcc5948c50f62ac1349333301934ceecd28bde2
|
3 |
+
size 14439
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10ca9bf76c7cf63afb390947106325fd549859bdbd17156e672be09fdd4b8f4d
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:458dbb378f2ef2f1049b9621261d1e352171f603268c570c20cc0831e3c801af
|
3 |
+
size 14439
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed71a40157ab7c8a370261156f0220243926bfa7450a89c6374ca93f070e4120
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a42c32ab3c49e09d799093ca137ee6e22777a2749e499367cd831d70ce83fb58
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 5.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -2596,11 +2596,85 @@
|
|
2596 |
"eval_samples_per_second": 971.32,
|
2597 |
"eval_steps_per_second": 15.541,
|
2598 |
"step": 350000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2599 |
}
|
2600 |
],
|
2601 |
"max_steps": 1000000,
|
2602 |
"num_train_epochs": 16,
|
2603 |
-
"total_flos": 2.
|
2604 |
"trial_name": null,
|
2605 |
"trial_params": null
|
2606 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 5.497274268175363,
|
5 |
+
"global_step": 360000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
2596 |
"eval_samples_per_second": 971.32,
|
2597 |
"eval_steps_per_second": 15.541,
|
2598 |
"step": 350000
|
2599 |
+
},
|
2600 |
+
{
|
2601 |
+
"epoch": 5.36,
|
2602 |
+
"learning_rate": 0.00011809236994438816,
|
2603 |
+
"loss": 0.2831,
|
2604 |
+
"step": 351000
|
2605 |
+
},
|
2606 |
+
{
|
2607 |
+
"epoch": 5.38,
|
2608 |
+
"learning_rate": 0.00011789795224815164,
|
2609 |
+
"loss": 0.2827,
|
2610 |
+
"step": 352000
|
2611 |
+
},
|
2612 |
+
{
|
2613 |
+
"epoch": 5.39,
|
2614 |
+
"learning_rate": 0.00011770312010598116,
|
2615 |
+
"loss": 0.282,
|
2616 |
+
"step": 353000
|
2617 |
+
},
|
2618 |
+
{
|
2619 |
+
"epoch": 5.41,
|
2620 |
+
"learning_rate": 0.00011750787564852973,
|
2621 |
+
"loss": 0.2822,
|
2622 |
+
"step": 354000
|
2623 |
+
},
|
2624 |
+
{
|
2625 |
+
"epoch": 5.42,
|
2626 |
+
"learning_rate": 0.00011731222101095955,
|
2627 |
+
"loss": 0.2825,
|
2628 |
+
"step": 355000
|
2629 |
+
},
|
2630 |
+
{
|
2631 |
+
"epoch": 5.42,
|
2632 |
+
"eval_runtime": 1.0697,
|
2633 |
+
"eval_samples_per_second": 934.885,
|
2634 |
+
"eval_steps_per_second": 14.958,
|
2635 |
+
"step": 355000
|
2636 |
+
},
|
2637 |
+
{
|
2638 |
+
"epoch": 5.44,
|
2639 |
+
"learning_rate": 0.00011711615833291833,
|
2640 |
+
"loss": 0.2822,
|
2641 |
+
"step": 356000
|
2642 |
+
},
|
2643 |
+
{
|
2644 |
+
"epoch": 5.45,
|
2645 |
+
"learning_rate": 0.0001169196897585161,
|
2646 |
+
"loss": 0.2824,
|
2647 |
+
"step": 357000
|
2648 |
+
},
|
2649 |
+
{
|
2650 |
+
"epoch": 5.47,
|
2651 |
+
"learning_rate": 0.00011672281743630175,
|
2652 |
+
"loss": 0.2818,
|
2653 |
+
"step": 358000
|
2654 |
+
},
|
2655 |
+
{
|
2656 |
+
"epoch": 5.48,
|
2657 |
+
"learning_rate": 0.0001165255435192394,
|
2658 |
+
"loss": 0.2815,
|
2659 |
+
"step": 359000
|
2660 |
+
},
|
2661 |
+
{
|
2662 |
+
"epoch": 5.5,
|
2663 |
+
"learning_rate": 0.00011632787016468506,
|
2664 |
+
"loss": 0.2819,
|
2665 |
+
"step": 360000
|
2666 |
+
},
|
2667 |
+
{
|
2668 |
+
"epoch": 5.5,
|
2669 |
+
"eval_runtime": 1.1008,
|
2670 |
+
"eval_samples_per_second": 908.433,
|
2671 |
+
"eval_steps_per_second": 14.535,
|
2672 |
+
"step": 360000
|
2673 |
}
|
2674 |
],
|
2675 |
"max_steps": 1000000,
|
2676 |
"num_train_epochs": 16,
|
2677 |
+
"total_flos": 2.5236061117517534e+22,
|
2678 |
"trial_name": null,
|
2679 |
"trial_params": null
|
2680 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d3b9e249ae21e66394d1e5adda08ca3c78e35cfc386e28fe333440be7a14450
|
3 |
size 449471589
|