Training in progress, step 19000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +703 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 715030586
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:24b1b3bb212bb8e7a89dfabaae6120a59b5faf8f122cf7ad539861771d8cb89a
|
| 3 |
size 715030586
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1032262338
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:229e29c1490a5a19b0d4fabe6ed475185030744313f815cbb86ca74d5cd2c449
|
| 3 |
size 1032262338
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9dde6c449de2177e60a94a9900fe6d5a14850cbd574c0318aea1700129a48c14
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea9cf41a0d1b98e0760cde7b6c59ff69bea027f4012ae8adfed82ffa854f9831
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:accf3b70270e61a071af09840966e9ef1fc65fa1b993b5947a7d43d8b578c1b2
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5275cc5e18e1b6590412766faef669b7b593c775c1ba9bd63e9afe6463f5d8b8
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b73090e5ff4d77e40aae33305c58d2deda13e4f4510f1c076acf40a9f8a97bef
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -12608,6 +12608,706 @@
|
|
| 12608 |
"learning_rate": 0.0004943110480558603,
|
| 12609 |
"loss": 17.5819,
|
| 12610 |
"step": 18000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12611 |
}
|
| 12612 |
],
|
| 12613 |
"logging_steps": 10,
|
|
@@ -12627,7 +13327,7 @@
|
|
| 12627 |
"attributes": {}
|
| 12628 |
}
|
| 12629 |
},
|
| 12630 |
-
"total_flos":
|
| 12631 |
"train_batch_size": 48,
|
| 12632 |
"trial_name": null,
|
| 12633 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.03704949520062789,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 19000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 12608 |
"learning_rate": 0.0004943110480558603,
|
| 12609 |
"loss": 17.5819,
|
| 12610 |
"step": 18000
|
| 12611 |
+
},
|
| 12612 |
+
{
|
| 12613 |
+
"epoch": 0.035119021503332015,
|
| 12614 |
+
"grad_norm": 9.4375,
|
| 12615 |
+
"learning_rate": 0.0004943077970404056,
|
| 12616 |
+
"loss": 17.6622,
|
| 12617 |
+
"step": 18010
|
| 12618 |
+
},
|
| 12619 |
+
{
|
| 12620 |
+
"epoch": 0.035138521237648136,
|
| 12621 |
+
"grad_norm": 8.25,
|
| 12622 |
+
"learning_rate": 0.0004943045460249509,
|
| 12623 |
+
"loss": 17.7021,
|
| 12624 |
+
"step": 18020
|
| 12625 |
+
},
|
| 12626 |
+
{
|
| 12627 |
+
"epoch": 0.03515802097196426,
|
| 12628 |
+
"grad_norm": 7.71875,
|
| 12629 |
+
"learning_rate": 0.0004943012950094963,
|
| 12630 |
+
"loss": 17.5496,
|
| 12631 |
+
"step": 18030
|
| 12632 |
+
},
|
| 12633 |
+
{
|
| 12634 |
+
"epoch": 0.03517752070628038,
|
| 12635 |
+
"grad_norm": 10.375,
|
| 12636 |
+
"learning_rate": 0.0004942980439940416,
|
| 12637 |
+
"loss": 17.6953,
|
| 12638 |
+
"step": 18040
|
| 12639 |
+
},
|
| 12640 |
+
{
|
| 12641 |
+
"epoch": 0.0351970204405965,
|
| 12642 |
+
"grad_norm": 8.625,
|
| 12643 |
+
"learning_rate": 0.0004942947929785869,
|
| 12644 |
+
"loss": 17.5717,
|
| 12645 |
+
"step": 18050
|
| 12646 |
+
},
|
| 12647 |
+
{
|
| 12648 |
+
"epoch": 0.03521652017491262,
|
| 12649 |
+
"grad_norm": 8.25,
|
| 12650 |
+
"learning_rate": 0.0004942915419631321,
|
| 12651 |
+
"loss": 17.5987,
|
| 12652 |
+
"step": 18060
|
| 12653 |
+
},
|
| 12654 |
+
{
|
| 12655 |
+
"epoch": 0.035236019909228734,
|
| 12656 |
+
"grad_norm": 8.0625,
|
| 12657 |
+
"learning_rate": 0.0004942882909476775,
|
| 12658 |
+
"loss": 17.4626,
|
| 12659 |
+
"step": 18070
|
| 12660 |
+
},
|
| 12661 |
+
{
|
| 12662 |
+
"epoch": 0.035255519643544855,
|
| 12663 |
+
"grad_norm": 8.6875,
|
| 12664 |
+
"learning_rate": 0.0004942850399322228,
|
| 12665 |
+
"loss": 17.6371,
|
| 12666 |
+
"step": 18080
|
| 12667 |
+
},
|
| 12668 |
+
{
|
| 12669 |
+
"epoch": 0.035275019377860976,
|
| 12670 |
+
"grad_norm": 8.8125,
|
| 12671 |
+
"learning_rate": 0.0004942817889167681,
|
| 12672 |
+
"loss": 17.6286,
|
| 12673 |
+
"step": 18090
|
| 12674 |
+
},
|
| 12675 |
+
{
|
| 12676 |
+
"epoch": 0.0352945191121771,
|
| 12677 |
+
"grad_norm": 7.375,
|
| 12678 |
+
"learning_rate": 0.0004942785379013135,
|
| 12679 |
+
"loss": 17.4806,
|
| 12680 |
+
"step": 18100
|
| 12681 |
+
},
|
| 12682 |
+
{
|
| 12683 |
+
"epoch": 0.03531401884649322,
|
| 12684 |
+
"grad_norm": 8.8125,
|
| 12685 |
+
"learning_rate": 0.0004942752868858588,
|
| 12686 |
+
"loss": 17.5438,
|
| 12687 |
+
"step": 18110
|
| 12688 |
+
},
|
| 12689 |
+
{
|
| 12690 |
+
"epoch": 0.03533351858080934,
|
| 12691 |
+
"grad_norm": 8.75,
|
| 12692 |
+
"learning_rate": 0.0004942720358704041,
|
| 12693 |
+
"loss": 17.6634,
|
| 12694 |
+
"step": 18120
|
| 12695 |
+
},
|
| 12696 |
+
{
|
| 12697 |
+
"epoch": 0.03535301831512545,
|
| 12698 |
+
"grad_norm": 8.1875,
|
| 12699 |
+
"learning_rate": 0.0004942687848549494,
|
| 12700 |
+
"loss": 17.5197,
|
| 12701 |
+
"step": 18130
|
| 12702 |
+
},
|
| 12703 |
+
{
|
| 12704 |
+
"epoch": 0.035372518049441574,
|
| 12705 |
+
"grad_norm": 8.25,
|
| 12706 |
+
"learning_rate": 0.0004942655338394948,
|
| 12707 |
+
"loss": 17.6001,
|
| 12708 |
+
"step": 18140
|
| 12709 |
+
},
|
| 12710 |
+
{
|
| 12711 |
+
"epoch": 0.035392017783757695,
|
| 12712 |
+
"grad_norm": 8.4375,
|
| 12713 |
+
"learning_rate": 0.0004942622828240401,
|
| 12714 |
+
"loss": 17.6117,
|
| 12715 |
+
"step": 18150
|
| 12716 |
+
},
|
| 12717 |
+
{
|
| 12718 |
+
"epoch": 0.035411517518073816,
|
| 12719 |
+
"grad_norm": 8.0625,
|
| 12720 |
+
"learning_rate": 0.0004942590318085854,
|
| 12721 |
+
"loss": 17.4802,
|
| 12722 |
+
"step": 18160
|
| 12723 |
+
},
|
| 12724 |
+
{
|
| 12725 |
+
"epoch": 0.03543101725238994,
|
| 12726 |
+
"grad_norm": 8.375,
|
| 12727 |
+
"learning_rate": 0.0004942557807931308,
|
| 12728 |
+
"loss": 17.6808,
|
| 12729 |
+
"step": 18170
|
| 12730 |
+
},
|
| 12731 |
+
{
|
| 12732 |
+
"epoch": 0.03545051698670606,
|
| 12733 |
+
"grad_norm": 8.5,
|
| 12734 |
+
"learning_rate": 0.0004942525297776761,
|
| 12735 |
+
"loss": 17.5905,
|
| 12736 |
+
"step": 18180
|
| 12737 |
+
},
|
| 12738 |
+
{
|
| 12739 |
+
"epoch": 0.03547001672102218,
|
| 12740 |
+
"grad_norm": 11.3125,
|
| 12741 |
+
"learning_rate": 0.0004942492787622214,
|
| 12742 |
+
"loss": 17.5809,
|
| 12743 |
+
"step": 18190
|
| 12744 |
+
},
|
| 12745 |
+
{
|
| 12746 |
+
"epoch": 0.035489516455338294,
|
| 12747 |
+
"grad_norm": 8.4375,
|
| 12748 |
+
"learning_rate": 0.0004942460277467667,
|
| 12749 |
+
"loss": 17.7091,
|
| 12750 |
+
"step": 18200
|
| 12751 |
+
},
|
| 12752 |
+
{
|
| 12753 |
+
"epoch": 0.035509016189654415,
|
| 12754 |
+
"grad_norm": 7.9375,
|
| 12755 |
+
"learning_rate": 0.0004942427767313121,
|
| 12756 |
+
"loss": 17.5444,
|
| 12757 |
+
"step": 18210
|
| 12758 |
+
},
|
| 12759 |
+
{
|
| 12760 |
+
"epoch": 0.035528515923970536,
|
| 12761 |
+
"grad_norm": 8.375,
|
| 12762 |
+
"learning_rate": 0.0004942395257158573,
|
| 12763 |
+
"loss": 17.5533,
|
| 12764 |
+
"step": 18220
|
| 12765 |
+
},
|
| 12766 |
+
{
|
| 12767 |
+
"epoch": 0.03554801565828666,
|
| 12768 |
+
"grad_norm": 7.75,
|
| 12769 |
+
"learning_rate": 0.0004942362747004026,
|
| 12770 |
+
"loss": 17.6227,
|
| 12771 |
+
"step": 18230
|
| 12772 |
+
},
|
| 12773 |
+
{
|
| 12774 |
+
"epoch": 0.03556751539260278,
|
| 12775 |
+
"grad_norm": 7.6875,
|
| 12776 |
+
"learning_rate": 0.000494233023684948,
|
| 12777 |
+
"loss": 17.5303,
|
| 12778 |
+
"step": 18240
|
| 12779 |
+
},
|
| 12780 |
+
{
|
| 12781 |
+
"epoch": 0.0355870151269189,
|
| 12782 |
+
"grad_norm": 7.84375,
|
| 12783 |
+
"learning_rate": 0.0004942297726694933,
|
| 12784 |
+
"loss": 17.5429,
|
| 12785 |
+
"step": 18250
|
| 12786 |
+
},
|
| 12787 |
+
{
|
| 12788 |
+
"epoch": 0.03560651486123501,
|
| 12789 |
+
"grad_norm": 9.8125,
|
| 12790 |
+
"learning_rate": 0.0004942265216540386,
|
| 12791 |
+
"loss": 17.6151,
|
| 12792 |
+
"step": 18260
|
| 12793 |
+
},
|
| 12794 |
+
{
|
| 12795 |
+
"epoch": 0.035626014595551134,
|
| 12796 |
+
"grad_norm": 10.5,
|
| 12797 |
+
"learning_rate": 0.0004942232706385839,
|
| 12798 |
+
"loss": 17.42,
|
| 12799 |
+
"step": 18270
|
| 12800 |
+
},
|
| 12801 |
+
{
|
| 12802 |
+
"epoch": 0.035645514329867255,
|
| 12803 |
+
"grad_norm": 9.1875,
|
| 12804 |
+
"learning_rate": 0.0004942200196231293,
|
| 12805 |
+
"loss": 17.436,
|
| 12806 |
+
"step": 18280
|
| 12807 |
+
},
|
| 12808 |
+
{
|
| 12809 |
+
"epoch": 0.035665014064183376,
|
| 12810 |
+
"grad_norm": 8.8125,
|
| 12811 |
+
"learning_rate": 0.0004942167686076746,
|
| 12812 |
+
"loss": 17.5189,
|
| 12813 |
+
"step": 18290
|
| 12814 |
+
},
|
| 12815 |
+
{
|
| 12816 |
+
"epoch": 0.0356845137984995,
|
| 12817 |
+
"grad_norm": 8.6875,
|
| 12818 |
+
"learning_rate": 0.0004942135175922199,
|
| 12819 |
+
"loss": 17.5484,
|
| 12820 |
+
"step": 18300
|
| 12821 |
+
},
|
| 12822 |
+
{
|
| 12823 |
+
"epoch": 0.03570401353281562,
|
| 12824 |
+
"grad_norm": 9.0,
|
| 12825 |
+
"learning_rate": 0.0004942102665767652,
|
| 12826 |
+
"loss": 17.6905,
|
| 12827 |
+
"step": 18310
|
| 12828 |
+
},
|
| 12829 |
+
{
|
| 12830 |
+
"epoch": 0.03572351326713174,
|
| 12831 |
+
"grad_norm": 8.0625,
|
| 12832 |
+
"learning_rate": 0.0004942070155613106,
|
| 12833 |
+
"loss": 17.677,
|
| 12834 |
+
"step": 18320
|
| 12835 |
+
},
|
| 12836 |
+
{
|
| 12837 |
+
"epoch": 0.03574301300144785,
|
| 12838 |
+
"grad_norm": 7.4375,
|
| 12839 |
+
"learning_rate": 0.0004942037645458559,
|
| 12840 |
+
"loss": 17.5109,
|
| 12841 |
+
"step": 18330
|
| 12842 |
+
},
|
| 12843 |
+
{
|
| 12844 |
+
"epoch": 0.035762512735763974,
|
| 12845 |
+
"grad_norm": 8.25,
|
| 12846 |
+
"learning_rate": 0.0004942005135304012,
|
| 12847 |
+
"loss": 17.52,
|
| 12848 |
+
"step": 18340
|
| 12849 |
+
},
|
| 12850 |
+
{
|
| 12851 |
+
"epoch": 0.035782012470080095,
|
| 12852 |
+
"grad_norm": 8.9375,
|
| 12853 |
+
"learning_rate": 0.0004941972625149466,
|
| 12854 |
+
"loss": 17.6037,
|
| 12855 |
+
"step": 18350
|
| 12856 |
+
},
|
| 12857 |
+
{
|
| 12858 |
+
"epoch": 0.035801512204396216,
|
| 12859 |
+
"grad_norm": 8.25,
|
| 12860 |
+
"learning_rate": 0.0004941940114994919,
|
| 12861 |
+
"loss": 17.5273,
|
| 12862 |
+
"step": 18360
|
| 12863 |
+
},
|
| 12864 |
+
{
|
| 12865 |
+
"epoch": 0.03582101193871234,
|
| 12866 |
+
"grad_norm": 8.25,
|
| 12867 |
+
"learning_rate": 0.0004941907604840372,
|
| 12868 |
+
"loss": 17.4488,
|
| 12869 |
+
"step": 18370
|
| 12870 |
+
},
|
| 12871 |
+
{
|
| 12872 |
+
"epoch": 0.03584051167302846,
|
| 12873 |
+
"grad_norm": 8.5625,
|
| 12874 |
+
"learning_rate": 0.0004941875094685825,
|
| 12875 |
+
"loss": 17.5285,
|
| 12876 |
+
"step": 18380
|
| 12877 |
+
},
|
| 12878 |
+
{
|
| 12879 |
+
"epoch": 0.03586001140734457,
|
| 12880 |
+
"grad_norm": 10.125,
|
| 12881 |
+
"learning_rate": 0.0004941842584531279,
|
| 12882 |
+
"loss": 17.5049,
|
| 12883 |
+
"step": 18390
|
| 12884 |
+
},
|
| 12885 |
+
{
|
| 12886 |
+
"epoch": 0.03587951114166069,
|
| 12887 |
+
"grad_norm": 8.625,
|
| 12888 |
+
"learning_rate": 0.0004941810074376732,
|
| 12889 |
+
"loss": 17.5321,
|
| 12890 |
+
"step": 18400
|
| 12891 |
+
},
|
| 12892 |
+
{
|
| 12893 |
+
"epoch": 0.035899010875976814,
|
| 12894 |
+
"grad_norm": 10.5,
|
| 12895 |
+
"learning_rate": 0.0004941777564222185,
|
| 12896 |
+
"loss": 17.5864,
|
| 12897 |
+
"step": 18410
|
| 12898 |
+
},
|
| 12899 |
+
{
|
| 12900 |
+
"epoch": 0.035918510610292935,
|
| 12901 |
+
"grad_norm": 8.25,
|
| 12902 |
+
"learning_rate": 0.0004941745054067638,
|
| 12903 |
+
"loss": 17.4779,
|
| 12904 |
+
"step": 18420
|
| 12905 |
+
},
|
| 12906 |
+
{
|
| 12907 |
+
"epoch": 0.035938010344609056,
|
| 12908 |
+
"grad_norm": 7.9375,
|
| 12909 |
+
"learning_rate": 0.0004941712543913091,
|
| 12910 |
+
"loss": 17.4962,
|
| 12911 |
+
"step": 18430
|
| 12912 |
+
},
|
| 12913 |
+
{
|
| 12914 |
+
"epoch": 0.03595751007892518,
|
| 12915 |
+
"grad_norm": 8.1875,
|
| 12916 |
+
"learning_rate": 0.0004941680033758544,
|
| 12917 |
+
"loss": 17.449,
|
| 12918 |
+
"step": 18440
|
| 12919 |
+
},
|
| 12920 |
+
{
|
| 12921 |
+
"epoch": 0.0359770098132413,
|
| 12922 |
+
"grad_norm": 7.84375,
|
| 12923 |
+
"learning_rate": 0.0004941647523603997,
|
| 12924 |
+
"loss": 17.5084,
|
| 12925 |
+
"step": 18450
|
| 12926 |
+
},
|
| 12927 |
+
{
|
| 12928 |
+
"epoch": 0.03599650954755741,
|
| 12929 |
+
"grad_norm": 7.78125,
|
| 12930 |
+
"learning_rate": 0.0004941615013449451,
|
| 12931 |
+
"loss": 17.5459,
|
| 12932 |
+
"step": 18460
|
| 12933 |
+
},
|
| 12934 |
+
{
|
| 12935 |
+
"epoch": 0.03601600928187353,
|
| 12936 |
+
"grad_norm": 7.875,
|
| 12937 |
+
"learning_rate": 0.0004941582503294904,
|
| 12938 |
+
"loss": 17.5541,
|
| 12939 |
+
"step": 18470
|
| 12940 |
+
},
|
| 12941 |
+
{
|
| 12942 |
+
"epoch": 0.036035509016189654,
|
| 12943 |
+
"grad_norm": 9.125,
|
| 12944 |
+
"learning_rate": 0.0004941549993140357,
|
| 12945 |
+
"loss": 17.635,
|
| 12946 |
+
"step": 18480
|
| 12947 |
+
},
|
| 12948 |
+
{
|
| 12949 |
+
"epoch": 0.036055008750505775,
|
| 12950 |
+
"grad_norm": 7.34375,
|
| 12951 |
+
"learning_rate": 0.000494151748298581,
|
| 12952 |
+
"loss": 17.5686,
|
| 12953 |
+
"step": 18490
|
| 12954 |
+
},
|
| 12955 |
+
{
|
| 12956 |
+
"epoch": 0.036074508484821896,
|
| 12957 |
+
"grad_norm": 8.75,
|
| 12958 |
+
"learning_rate": 0.0004941484972831264,
|
| 12959 |
+
"loss": 17.6741,
|
| 12960 |
+
"step": 18500
|
| 12961 |
+
},
|
| 12962 |
+
{
|
| 12963 |
+
"epoch": 0.03609400821913802,
|
| 12964 |
+
"grad_norm": 7.84375,
|
| 12965 |
+
"learning_rate": 0.0004941452462676717,
|
| 12966 |
+
"loss": 17.6735,
|
| 12967 |
+
"step": 18510
|
| 12968 |
+
},
|
| 12969 |
+
{
|
| 12970 |
+
"epoch": 0.03611350795345413,
|
| 12971 |
+
"grad_norm": 9.0625,
|
| 12972 |
+
"learning_rate": 0.000494141995252217,
|
| 12973 |
+
"loss": 17.5192,
|
| 12974 |
+
"step": 18520
|
| 12975 |
+
},
|
| 12976 |
+
{
|
| 12977 |
+
"epoch": 0.03613300768777025,
|
| 12978 |
+
"grad_norm": 8.625,
|
| 12979 |
+
"learning_rate": 0.0004941387442367624,
|
| 12980 |
+
"loss": 17.3641,
|
| 12981 |
+
"step": 18530
|
| 12982 |
+
},
|
| 12983 |
+
{
|
| 12984 |
+
"epoch": 0.03615250742208637,
|
| 12985 |
+
"grad_norm": 7.78125,
|
| 12986 |
+
"learning_rate": 0.0004941354932213077,
|
| 12987 |
+
"loss": 17.586,
|
| 12988 |
+
"step": 18540
|
| 12989 |
+
},
|
| 12990 |
+
{
|
| 12991 |
+
"epoch": 0.036172007156402494,
|
| 12992 |
+
"grad_norm": 8.875,
|
| 12993 |
+
"learning_rate": 0.000494132242205853,
|
| 12994 |
+
"loss": 17.5065,
|
| 12995 |
+
"step": 18550
|
| 12996 |
+
},
|
| 12997 |
+
{
|
| 12998 |
+
"epoch": 0.036191506890718615,
|
| 12999 |
+
"grad_norm": 9.3125,
|
| 13000 |
+
"learning_rate": 0.0004941289911903983,
|
| 13001 |
+
"loss": 17.4222,
|
| 13002 |
+
"step": 18560
|
| 13003 |
+
},
|
| 13004 |
+
{
|
| 13005 |
+
"epoch": 0.036211006625034736,
|
| 13006 |
+
"grad_norm": 8.25,
|
| 13007 |
+
"learning_rate": 0.0004941257401749437,
|
| 13008 |
+
"loss": 17.513,
|
| 13009 |
+
"step": 18570
|
| 13010 |
+
},
|
| 13011 |
+
{
|
| 13012 |
+
"epoch": 0.03623050635935086,
|
| 13013 |
+
"grad_norm": 8.5625,
|
| 13014 |
+
"learning_rate": 0.000494122489159489,
|
| 13015 |
+
"loss": 17.5073,
|
| 13016 |
+
"step": 18580
|
| 13017 |
+
},
|
| 13018 |
+
{
|
| 13019 |
+
"epoch": 0.03625000609366697,
|
| 13020 |
+
"grad_norm": 11.875,
|
| 13021 |
+
"learning_rate": 0.0004941192381440343,
|
| 13022 |
+
"loss": 17.3283,
|
| 13023 |
+
"step": 18590
|
| 13024 |
+
},
|
| 13025 |
+
{
|
| 13026 |
+
"epoch": 0.03626950582798309,
|
| 13027 |
+
"grad_norm": 7.90625,
|
| 13028 |
+
"learning_rate": 0.0004941159871285797,
|
| 13029 |
+
"loss": 17.4716,
|
| 13030 |
+
"step": 18600
|
| 13031 |
+
},
|
| 13032 |
+
{
|
| 13033 |
+
"epoch": 0.03628900556229921,
|
| 13034 |
+
"grad_norm": 8.5,
|
| 13035 |
+
"learning_rate": 0.000494112736113125,
|
| 13036 |
+
"loss": 17.372,
|
| 13037 |
+
"step": 18610
|
| 13038 |
+
},
|
| 13039 |
+
{
|
| 13040 |
+
"epoch": 0.036308505296615334,
|
| 13041 |
+
"grad_norm": 8.75,
|
| 13042 |
+
"learning_rate": 0.0004941094850976703,
|
| 13043 |
+
"loss": 17.4356,
|
| 13044 |
+
"step": 18620
|
| 13045 |
+
},
|
| 13046 |
+
{
|
| 13047 |
+
"epoch": 0.036328005030931455,
|
| 13048 |
+
"grad_norm": 8.0,
|
| 13049 |
+
"learning_rate": 0.0004941062340822156,
|
| 13050 |
+
"loss": 17.5272,
|
| 13051 |
+
"step": 18630
|
| 13052 |
+
},
|
| 13053 |
+
{
|
| 13054 |
+
"epoch": 0.036347504765247576,
|
| 13055 |
+
"grad_norm": 7.65625,
|
| 13056 |
+
"learning_rate": 0.000494102983066761,
|
| 13057 |
+
"loss": 17.5673,
|
| 13058 |
+
"step": 18640
|
| 13059 |
+
},
|
| 13060 |
+
{
|
| 13061 |
+
"epoch": 0.03636700449956369,
|
| 13062 |
+
"grad_norm": 7.71875,
|
| 13063 |
+
"learning_rate": 0.0004940997320513063,
|
| 13064 |
+
"loss": 17.5017,
|
| 13065 |
+
"step": 18650
|
| 13066 |
+
},
|
| 13067 |
+
{
|
| 13068 |
+
"epoch": 0.03638650423387981,
|
| 13069 |
+
"grad_norm": 7.65625,
|
| 13070 |
+
"learning_rate": 0.0004940964810358516,
|
| 13071 |
+
"loss": 17.4877,
|
| 13072 |
+
"step": 18660
|
| 13073 |
+
},
|
| 13074 |
+
{
|
| 13075 |
+
"epoch": 0.03640600396819593,
|
| 13076 |
+
"grad_norm": 8.4375,
|
| 13077 |
+
"learning_rate": 0.0004940932300203969,
|
| 13078 |
+
"loss": 17.4823,
|
| 13079 |
+
"step": 18670
|
| 13080 |
+
},
|
| 13081 |
+
{
|
| 13082 |
+
"epoch": 0.03642550370251205,
|
| 13083 |
+
"grad_norm": 8.25,
|
| 13084 |
+
"learning_rate": 0.0004940899790049422,
|
| 13085 |
+
"loss": 17.4745,
|
| 13086 |
+
"step": 18680
|
| 13087 |
+
},
|
| 13088 |
+
{
|
| 13089 |
+
"epoch": 0.036445003436828174,
|
| 13090 |
+
"grad_norm": 9.0,
|
| 13091 |
+
"learning_rate": 0.0004940867279894875,
|
| 13092 |
+
"loss": 17.6121,
|
| 13093 |
+
"step": 18690
|
| 13094 |
+
},
|
| 13095 |
+
{
|
| 13096 |
+
"epoch": 0.036464503171144295,
|
| 13097 |
+
"grad_norm": 9.1875,
|
| 13098 |
+
"learning_rate": 0.0004940834769740328,
|
| 13099 |
+
"loss": 17.5465,
|
| 13100 |
+
"step": 18700
|
| 13101 |
+
},
|
| 13102 |
+
{
|
| 13103 |
+
"epoch": 0.036484002905460416,
|
| 13104 |
+
"grad_norm": 10.3125,
|
| 13105 |
+
"learning_rate": 0.0004940802259585782,
|
| 13106 |
+
"loss": 17.5631,
|
| 13107 |
+
"step": 18710
|
| 13108 |
+
},
|
| 13109 |
+
{
|
| 13110 |
+
"epoch": 0.03650350263977653,
|
| 13111 |
+
"grad_norm": 8.0625,
|
| 13112 |
+
"learning_rate": 0.0004940769749431235,
|
| 13113 |
+
"loss": 17.4597,
|
| 13114 |
+
"step": 18720
|
| 13115 |
+
},
|
| 13116 |
+
{
|
| 13117 |
+
"epoch": 0.03652300237409265,
|
| 13118 |
+
"grad_norm": 13.4375,
|
| 13119 |
+
"learning_rate": 0.0004940737239276688,
|
| 13120 |
+
"loss": 17.4941,
|
| 13121 |
+
"step": 18730
|
| 13122 |
+
},
|
| 13123 |
+
{
|
| 13124 |
+
"epoch": 0.03654250210840877,
|
| 13125 |
+
"grad_norm": 7.4375,
|
| 13126 |
+
"learning_rate": 0.0004940704729122142,
|
| 13127 |
+
"loss": 17.5893,
|
| 13128 |
+
"step": 18740
|
| 13129 |
+
},
|
| 13130 |
+
{
|
| 13131 |
+
"epoch": 0.036562001842724894,
|
| 13132 |
+
"grad_norm": 9.0625,
|
| 13133 |
+
"learning_rate": 0.0004940672218967595,
|
| 13134 |
+
"loss": 17.5582,
|
| 13135 |
+
"step": 18750
|
| 13136 |
+
},
|
| 13137 |
+
{
|
| 13138 |
+
"epoch": 0.036581501577041015,
|
| 13139 |
+
"grad_norm": 8.8125,
|
| 13140 |
+
"learning_rate": 0.0004940639708813048,
|
| 13141 |
+
"loss": 17.51,
|
| 13142 |
+
"step": 18760
|
| 13143 |
+
},
|
| 13144 |
+
{
|
| 13145 |
+
"epoch": 0.036601001311357136,
|
| 13146 |
+
"grad_norm": 8.1875,
|
| 13147 |
+
"learning_rate": 0.0004940607198658501,
|
| 13148 |
+
"loss": 17.4947,
|
| 13149 |
+
"step": 18770
|
| 13150 |
+
},
|
| 13151 |
+
{
|
| 13152 |
+
"epoch": 0.03662050104567325,
|
| 13153 |
+
"grad_norm": 8.0,
|
| 13154 |
+
"learning_rate": 0.0004940574688503955,
|
| 13155 |
+
"loss": 17.3291,
|
| 13156 |
+
"step": 18780
|
| 13157 |
+
},
|
| 13158 |
+
{
|
| 13159 |
+
"epoch": 0.03664000077998937,
|
| 13160 |
+
"grad_norm": 8.0625,
|
| 13161 |
+
"learning_rate": 0.0004940542178349408,
|
| 13162 |
+
"loss": 17.4147,
|
| 13163 |
+
"step": 18790
|
| 13164 |
+
},
|
| 13165 |
+
{
|
| 13166 |
+
"epoch": 0.03665950051430549,
|
| 13167 |
+
"grad_norm": 7.3125,
|
| 13168 |
+
"learning_rate": 0.0004940509668194861,
|
| 13169 |
+
"loss": 17.4158,
|
| 13170 |
+
"step": 18800
|
| 13171 |
+
},
|
| 13172 |
+
{
|
| 13173 |
+
"epoch": 0.03667900024862161,
|
| 13174 |
+
"grad_norm": 8.5625,
|
| 13175 |
+
"learning_rate": 0.0004940477158040315,
|
| 13176 |
+
"loss": 17.3498,
|
| 13177 |
+
"step": 18810
|
| 13178 |
+
},
|
| 13179 |
+
{
|
| 13180 |
+
"epoch": 0.036698499982937734,
|
| 13181 |
+
"grad_norm": 8.375,
|
| 13182 |
+
"learning_rate": 0.0004940444647885768,
|
| 13183 |
+
"loss": 17.5235,
|
| 13184 |
+
"step": 18820
|
| 13185 |
+
},
|
| 13186 |
+
{
|
| 13187 |
+
"epoch": 0.036717999717253855,
|
| 13188 |
+
"grad_norm": 8.125,
|
| 13189 |
+
"learning_rate": 0.000494041213773122,
|
| 13190 |
+
"loss": 17.5074,
|
| 13191 |
+
"step": 18830
|
| 13192 |
+
},
|
| 13193 |
+
{
|
| 13194 |
+
"epoch": 0.036737499451569976,
|
| 13195 |
+
"grad_norm": 8.4375,
|
| 13196 |
+
"learning_rate": 0.0004940379627576673,
|
| 13197 |
+
"loss": 17.5261,
|
| 13198 |
+
"step": 18840
|
| 13199 |
+
},
|
| 13200 |
+
{
|
| 13201 |
+
"epoch": 0.03675699918588609,
|
| 13202 |
+
"grad_norm": 26.625,
|
| 13203 |
+
"learning_rate": 0.0004940347117422127,
|
| 13204 |
+
"loss": 17.4652,
|
| 13205 |
+
"step": 18850
|
| 13206 |
+
},
|
| 13207 |
+
{
|
| 13208 |
+
"epoch": 0.03677649892020221,
|
| 13209 |
+
"grad_norm": 8.75,
|
| 13210 |
+
"learning_rate": 0.000494031460726758,
|
| 13211 |
+
"loss": 17.5258,
|
| 13212 |
+
"step": 18860
|
| 13213 |
+
},
|
| 13214 |
+
{
|
| 13215 |
+
"epoch": 0.03679599865451833,
|
| 13216 |
+
"grad_norm": 9.125,
|
| 13217 |
+
"learning_rate": 0.0004940282097113033,
|
| 13218 |
+
"loss": 17.487,
|
| 13219 |
+
"step": 18870
|
| 13220 |
+
},
|
| 13221 |
+
{
|
| 13222 |
+
"epoch": 0.03681549838883445,
|
| 13223 |
+
"grad_norm": 9.4375,
|
| 13224 |
+
"learning_rate": 0.0004940249586958486,
|
| 13225 |
+
"loss": 17.4624,
|
| 13226 |
+
"step": 18880
|
| 13227 |
+
},
|
| 13228 |
+
{
|
| 13229 |
+
"epoch": 0.036834998123150574,
|
| 13230 |
+
"grad_norm": 7.78125,
|
| 13231 |
+
"learning_rate": 0.000494021707680394,
|
| 13232 |
+
"loss": 17.4346,
|
| 13233 |
+
"step": 18890
|
| 13234 |
+
},
|
| 13235 |
+
{
|
| 13236 |
+
"epoch": 0.036854497857466695,
|
| 13237 |
+
"grad_norm": 9.1875,
|
| 13238 |
+
"learning_rate": 0.0004940184566649393,
|
| 13239 |
+
"loss": 17.4845,
|
| 13240 |
+
"step": 18900
|
| 13241 |
+
},
|
| 13242 |
+
{
|
| 13243 |
+
"epoch": 0.03687399759178281,
|
| 13244 |
+
"grad_norm": 8.25,
|
| 13245 |
+
"learning_rate": 0.0004940152056494846,
|
| 13246 |
+
"loss": 17.4851,
|
| 13247 |
+
"step": 18910
|
| 13248 |
+
},
|
| 13249 |
+
{
|
| 13250 |
+
"epoch": 0.03689349732609893,
|
| 13251 |
+
"grad_norm": 8.8125,
|
| 13252 |
+
"learning_rate": 0.00049401195463403,
|
| 13253 |
+
"loss": 17.423,
|
| 13254 |
+
"step": 18920
|
| 13255 |
+
},
|
| 13256 |
+
{
|
| 13257 |
+
"epoch": 0.03691299706041505,
|
| 13258 |
+
"grad_norm": 7.9375,
|
| 13259 |
+
"learning_rate": 0.0004940087036185753,
|
| 13260 |
+
"loss": 17.5391,
|
| 13261 |
+
"step": 18930
|
| 13262 |
+
},
|
| 13263 |
+
{
|
| 13264 |
+
"epoch": 0.03693249679473117,
|
| 13265 |
+
"grad_norm": 8.0625,
|
| 13266 |
+
"learning_rate": 0.0004940054526031206,
|
| 13267 |
+
"loss": 17.3817,
|
| 13268 |
+
"step": 18940
|
| 13269 |
+
},
|
| 13270 |
+
{
|
| 13271 |
+
"epoch": 0.03695199652904729,
|
| 13272 |
+
"grad_norm": 7.90625,
|
| 13273 |
+
"learning_rate": 0.0004940022015876659,
|
| 13274 |
+
"loss": 17.3288,
|
| 13275 |
+
"step": 18950
|
| 13276 |
+
},
|
| 13277 |
+
{
|
| 13278 |
+
"epoch": 0.036971496263363414,
|
| 13279 |
+
"grad_norm": 7.625,
|
| 13280 |
+
"learning_rate": 0.0004939989505722113,
|
| 13281 |
+
"loss": 17.4329,
|
| 13282 |
+
"step": 18960
|
| 13283 |
+
},
|
| 13284 |
+
{
|
| 13285 |
+
"epoch": 0.036990995997679535,
|
| 13286 |
+
"grad_norm": 7.625,
|
| 13287 |
+
"learning_rate": 0.0004939956995567566,
|
| 13288 |
+
"loss": 17.4302,
|
| 13289 |
+
"step": 18970
|
| 13290 |
+
},
|
| 13291 |
+
{
|
| 13292 |
+
"epoch": 0.03701049573199565,
|
| 13293 |
+
"grad_norm": 7.34375,
|
| 13294 |
+
"learning_rate": 0.0004939924485413019,
|
| 13295 |
+
"loss": 17.3948,
|
| 13296 |
+
"step": 18980
|
| 13297 |
+
},
|
| 13298 |
+
{
|
| 13299 |
+
"epoch": 0.03702999546631177,
|
| 13300 |
+
"grad_norm": 8.4375,
|
| 13301 |
+
"learning_rate": 0.0004939891975258471,
|
| 13302 |
+
"loss": 17.4144,
|
| 13303 |
+
"step": 18990
|
| 13304 |
+
},
|
| 13305 |
+
{
|
| 13306 |
+
"epoch": 0.03704949520062789,
|
| 13307 |
+
"grad_norm": 8.1875,
|
| 13308 |
+
"learning_rate": 0.0004939859465103925,
|
| 13309 |
+
"loss": 17.4472,
|
| 13310 |
+
"step": 19000
|
| 13311 |
}
|
| 13312 |
],
|
| 13313 |
"logging_steps": 10,
|
|
|
|
| 13327 |
"attributes": {}
|
| 13328 |
}
|
| 13329 |
},
|
| 13330 |
+
"total_flos": 4.105398039319098e+19,
|
| 13331 |
"train_batch_size": 48,
|
| 13332 |
"trial_name": null,
|
| 13333 |
"trial_params": null
|