Training in progress, step 122000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +703 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 715030586
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbeaafd6ccfc1a71df631284d94737e176690a5d53963ee816742d55c66f65c3
|
| 3 |
size 715030586
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1032262338
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d105f7bc65410e3f121dcaf59b93fed762c9a65fe7e4c8955d1292cb6a6876c9
|
| 3 |
size 1032262338
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9da9fa9d20ad8eaec174be663669ed0dd6272da27b984848d5af57376e63a91f
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4fa62ee06d52a6750aacfff038383024cfa35b60c5b93fdacff2bca27d4639e6
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1810a8bef166e692355d67f304bf8cfd105103f952547985645833c2feea07b4
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e06c20da7b3de893663276090538aadc1b5a365c5cfce0a0140a2548043a2773
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:60ff0e39fa2ada0903d0841edad35ce944a197d8f614422d9d9915f5101a0a12
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -84708,6 +84708,706 @@
|
|
| 84708 |
"learning_rate": 0.0004900826872074986,
|
| 84709 |
"loss": 14.854,
|
| 84710 |
"step": 121000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84711 |
}
|
| 84712 |
],
|
| 84713 |
"logging_steps": 10,
|
|
@@ -84727,7 +85427,7 @@
|
|
| 84727 |
"attributes": {}
|
| 84728 |
}
|
| 84729 |
},
|
| 84730 |
-
"total_flos": 2.
|
| 84731 |
"train_batch_size": 16,
|
| 84732 |
"trial_name": null,
|
| 84733 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.06024013098772089,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 122000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 84708 |
"learning_rate": 0.0004900826872074986,
|
| 84709 |
"loss": 14.854,
|
| 84710 |
"step": 121000
|
| 84711 |
+
},
|
| 84712 |
+
{
|
| 84713 |
+
"epoch": 0.0597512971379025,
|
| 84714 |
+
"grad_norm": 18.25,
|
| 84715 |
+
"learning_rate": 0.0004900818641871587,
|
| 84716 |
+
"loss": 14.6916,
|
| 84717 |
+
"step": 121010
|
| 84718 |
+
},
|
| 84719 |
+
{
|
| 84720 |
+
"epoch": 0.05975623485355723,
|
| 84721 |
+
"grad_norm": 9.6875,
|
| 84722 |
+
"learning_rate": 0.000490081041166819,
|
| 84723 |
+
"loss": 14.7784,
|
| 84724 |
+
"step": 121020
|
| 84725 |
+
},
|
| 84726 |
+
{
|
| 84727 |
+
"epoch": 0.059761172569211964,
|
| 84728 |
+
"grad_norm": 14.3125,
|
| 84729 |
+
"learning_rate": 0.0004900802181464792,
|
| 84730 |
+
"loss": 14.6165,
|
| 84731 |
+
"step": 121030
|
| 84732 |
+
},
|
| 84733 |
+
{
|
| 84734 |
+
"epoch": 0.05976611028486669,
|
| 84735 |
+
"grad_norm": 12.75,
|
| 84736 |
+
"learning_rate": 0.0004900793951261394,
|
| 84737 |
+
"loss": 14.8089,
|
| 84738 |
+
"step": 121040
|
| 84739 |
+
},
|
| 84740 |
+
{
|
| 84741 |
+
"epoch": 0.05977104800052142,
|
| 84742 |
+
"grad_norm": 13.6875,
|
| 84743 |
+
"learning_rate": 0.0004900785721057996,
|
| 84744 |
+
"loss": 14.64,
|
| 84745 |
+
"step": 121050
|
| 84746 |
+
},
|
| 84747 |
+
{
|
| 84748 |
+
"epoch": 0.05977598571617616,
|
| 84749 |
+
"grad_norm": 7.84375,
|
| 84750 |
+
"learning_rate": 0.0004900777490854598,
|
| 84751 |
+
"loss": 14.524,
|
| 84752 |
+
"step": 121060
|
| 84753 |
+
},
|
| 84754 |
+
{
|
| 84755 |
+
"epoch": 0.05978092343183088,
|
| 84756 |
+
"grad_norm": 8.1875,
|
| 84757 |
+
"learning_rate": 0.00049007692606512,
|
| 84758 |
+
"loss": 14.5122,
|
| 84759 |
+
"step": 121070
|
| 84760 |
+
},
|
| 84761 |
+
{
|
| 84762 |
+
"epoch": 0.05978586114748562,
|
| 84763 |
+
"grad_norm": 8.5625,
|
| 84764 |
+
"learning_rate": 0.0004900761030447803,
|
| 84765 |
+
"loss": 14.4947,
|
| 84766 |
+
"step": 121080
|
| 84767 |
+
},
|
| 84768 |
+
{
|
| 84769 |
+
"epoch": 0.05979079886314035,
|
| 84770 |
+
"grad_norm": 9.4375,
|
| 84771 |
+
"learning_rate": 0.0004900752800244405,
|
| 84772 |
+
"loss": 14.5841,
|
| 84773 |
+
"step": 121090
|
| 84774 |
+
},
|
| 84775 |
+
{
|
| 84776 |
+
"epoch": 0.059795736578795076,
|
| 84777 |
+
"grad_norm": 9.1875,
|
| 84778 |
+
"learning_rate": 0.0004900744570041006,
|
| 84779 |
+
"loss": 14.5958,
|
| 84780 |
+
"step": 121100
|
| 84781 |
+
},
|
| 84782 |
+
{
|
| 84783 |
+
"epoch": 0.05980067429444981,
|
| 84784 |
+
"grad_norm": 8.8125,
|
| 84785 |
+
"learning_rate": 0.0004900736339837608,
|
| 84786 |
+
"loss": 14.6845,
|
| 84787 |
+
"step": 121110
|
| 84788 |
+
},
|
| 84789 |
+
{
|
| 84790 |
+
"epoch": 0.05980561201010454,
|
| 84791 |
+
"grad_norm": 8.625,
|
| 84792 |
+
"learning_rate": 0.000490072810963421,
|
| 84793 |
+
"loss": 14.6689,
|
| 84794 |
+
"step": 121120
|
| 84795 |
+
},
|
| 84796 |
+
{
|
| 84797 |
+
"epoch": 0.05981054972575927,
|
| 84798 |
+
"grad_norm": 12.375,
|
| 84799 |
+
"learning_rate": 0.0004900719879430812,
|
| 84800 |
+
"loss": 14.6507,
|
| 84801 |
+
"step": 121130
|
| 84802 |
+
},
|
| 84803 |
+
{
|
| 84804 |
+
"epoch": 0.059815487441414,
|
| 84805 |
+
"grad_norm": 9.625,
|
| 84806 |
+
"learning_rate": 0.0004900711649227415,
|
| 84807 |
+
"loss": 14.702,
|
| 84808 |
+
"step": 121140
|
| 84809 |
+
},
|
| 84810 |
+
{
|
| 84811 |
+
"epoch": 0.059820425157068736,
|
| 84812 |
+
"grad_norm": 8.9375,
|
| 84813 |
+
"learning_rate": 0.0004900703419024016,
|
| 84814 |
+
"loss": 14.3188,
|
| 84815 |
+
"step": 121150
|
| 84816 |
+
},
|
| 84817 |
+
{
|
| 84818 |
+
"epoch": 0.05982536287272346,
|
| 84819 |
+
"grad_norm": 9.375,
|
| 84820 |
+
"learning_rate": 0.0004900695188820619,
|
| 84821 |
+
"loss": 14.7093,
|
| 84822 |
+
"step": 121160
|
| 84823 |
+
},
|
| 84824 |
+
{
|
| 84825 |
+
"epoch": 0.059830300588378196,
|
| 84826 |
+
"grad_norm": 9.0625,
|
| 84827 |
+
"learning_rate": 0.0004900686958617221,
|
| 84828 |
+
"loss": 14.7201,
|
| 84829 |
+
"step": 121170
|
| 84830 |
+
},
|
| 84831 |
+
{
|
| 84832 |
+
"epoch": 0.05983523830403293,
|
| 84833 |
+
"grad_norm": 9.4375,
|
| 84834 |
+
"learning_rate": 0.0004900678728413822,
|
| 84835 |
+
"loss": 14.575,
|
| 84836 |
+
"step": 121180
|
| 84837 |
+
},
|
| 84838 |
+
{
|
| 84839 |
+
"epoch": 0.05984017601968766,
|
| 84840 |
+
"grad_norm": 10.375,
|
| 84841 |
+
"learning_rate": 0.0004900670498210424,
|
| 84842 |
+
"loss": 14.666,
|
| 84843 |
+
"step": 121190
|
| 84844 |
+
},
|
| 84845 |
+
{
|
| 84846 |
+
"epoch": 0.05984511373534239,
|
| 84847 |
+
"grad_norm": 9.375,
|
| 84848 |
+
"learning_rate": 0.0004900662268007027,
|
| 84849 |
+
"loss": 14.6933,
|
| 84850 |
+
"step": 121200
|
| 84851 |
+
},
|
| 84852 |
+
{
|
| 84853 |
+
"epoch": 0.05985005145099712,
|
| 84854 |
+
"grad_norm": 8.8125,
|
| 84855 |
+
"learning_rate": 0.0004900654037803628,
|
| 84856 |
+
"loss": 14.8033,
|
| 84857 |
+
"step": 121210
|
| 84858 |
+
},
|
| 84859 |
+
{
|
| 84860 |
+
"epoch": 0.059854989166651856,
|
| 84861 |
+
"grad_norm": 8.625,
|
| 84862 |
+
"learning_rate": 0.0004900645807600231,
|
| 84863 |
+
"loss": 14.4617,
|
| 84864 |
+
"step": 121220
|
| 84865 |
+
},
|
| 84866 |
+
{
|
| 84867 |
+
"epoch": 0.05985992688230658,
|
| 84868 |
+
"grad_norm": 9.625,
|
| 84869 |
+
"learning_rate": 0.0004900637577396833,
|
| 84870 |
+
"loss": 14.7749,
|
| 84871 |
+
"step": 121230
|
| 84872 |
+
},
|
| 84873 |
+
{
|
| 84874 |
+
"epoch": 0.059864864597961316,
|
| 84875 |
+
"grad_norm": 9.125,
|
| 84876 |
+
"learning_rate": 0.0004900629347193435,
|
| 84877 |
+
"loss": 14.6859,
|
| 84878 |
+
"step": 121240
|
| 84879 |
+
},
|
| 84880 |
+
{
|
| 84881 |
+
"epoch": 0.05986980231361605,
|
| 84882 |
+
"grad_norm": 9.3125,
|
| 84883 |
+
"learning_rate": 0.0004900621116990038,
|
| 84884 |
+
"loss": 14.5459,
|
| 84885 |
+
"step": 121250
|
| 84886 |
+
},
|
| 84887 |
+
{
|
| 84888 |
+
"epoch": 0.059874740029270776,
|
| 84889 |
+
"grad_norm": 10.9375,
|
| 84890 |
+
"learning_rate": 0.000490061288678664,
|
| 84891 |
+
"loss": 14.7551,
|
| 84892 |
+
"step": 121260
|
| 84893 |
+
},
|
| 84894 |
+
{
|
| 84895 |
+
"epoch": 0.05987967774492551,
|
| 84896 |
+
"grad_norm": 7.8125,
|
| 84897 |
+
"learning_rate": 0.0004900604656583241,
|
| 84898 |
+
"loss": 14.719,
|
| 84899 |
+
"step": 121270
|
| 84900 |
+
},
|
| 84901 |
+
{
|
| 84902 |
+
"epoch": 0.05988461546058024,
|
| 84903 |
+
"grad_norm": 8.75,
|
| 84904 |
+
"learning_rate": 0.0004900596426379843,
|
| 84905 |
+
"loss": 14.6706,
|
| 84906 |
+
"step": 121280
|
| 84907 |
+
},
|
| 84908 |
+
{
|
| 84909 |
+
"epoch": 0.05988955317623497,
|
| 84910 |
+
"grad_norm": 9.6875,
|
| 84911 |
+
"learning_rate": 0.0004900588196176445,
|
| 84912 |
+
"loss": 14.6011,
|
| 84913 |
+
"step": 121290
|
| 84914 |
+
},
|
| 84915 |
+
{
|
| 84916 |
+
"epoch": 0.0598944908918897,
|
| 84917 |
+
"grad_norm": 11.25,
|
| 84918 |
+
"learning_rate": 0.0004900579965973047,
|
| 84919 |
+
"loss": 14.5435,
|
| 84920 |
+
"step": 121300
|
| 84921 |
+
},
|
| 84922 |
+
{
|
| 84923 |
+
"epoch": 0.059899428607544436,
|
| 84924 |
+
"grad_norm": 125.0,
|
| 84925 |
+
"learning_rate": 0.000490057173576965,
|
| 84926 |
+
"loss": 14.7466,
|
| 84927 |
+
"step": 121310
|
| 84928 |
+
},
|
| 84929 |
+
{
|
| 84930 |
+
"epoch": 0.05990436632319916,
|
| 84931 |
+
"grad_norm": 10.5625,
|
| 84932 |
+
"learning_rate": 0.0004900563505566251,
|
| 84933 |
+
"loss": 14.6536,
|
| 84934 |
+
"step": 121320
|
| 84935 |
+
},
|
| 84936 |
+
{
|
| 84937 |
+
"epoch": 0.059909304038853896,
|
| 84938 |
+
"grad_norm": 11.6875,
|
| 84939 |
+
"learning_rate": 0.0004900555275362854,
|
| 84940 |
+
"loss": 14.7485,
|
| 84941 |
+
"step": 121330
|
| 84942 |
+
},
|
| 84943 |
+
{
|
| 84944 |
+
"epoch": 0.05991424175450863,
|
| 84945 |
+
"grad_norm": 15.6875,
|
| 84946 |
+
"learning_rate": 0.0004900547045159455,
|
| 84947 |
+
"loss": 14.7255,
|
| 84948 |
+
"step": 121340
|
| 84949 |
+
},
|
| 84950 |
+
{
|
| 84951 |
+
"epoch": 0.05991917947016336,
|
| 84952 |
+
"grad_norm": 8.5,
|
| 84953 |
+
"learning_rate": 0.0004900538814956057,
|
| 84954 |
+
"loss": 14.6954,
|
| 84955 |
+
"step": 121350
|
| 84956 |
+
},
|
| 84957 |
+
{
|
| 84958 |
+
"epoch": 0.05992411718581809,
|
| 84959 |
+
"grad_norm": 8.625,
|
| 84960 |
+
"learning_rate": 0.0004900530584752659,
|
| 84961 |
+
"loss": 14.7095,
|
| 84962 |
+
"step": 121360
|
| 84963 |
+
},
|
| 84964 |
+
{
|
| 84965 |
+
"epoch": 0.05992905490147282,
|
| 84966 |
+
"grad_norm": 8.875,
|
| 84967 |
+
"learning_rate": 0.0004900522354549262,
|
| 84968 |
+
"loss": 14.6034,
|
| 84969 |
+
"step": 121370
|
| 84970 |
+
},
|
| 84971 |
+
{
|
| 84972 |
+
"epoch": 0.059933992617127556,
|
| 84973 |
+
"grad_norm": 14.3125,
|
| 84974 |
+
"learning_rate": 0.0004900514124345863,
|
| 84975 |
+
"loss": 14.5974,
|
| 84976 |
+
"step": 121380
|
| 84977 |
+
},
|
| 84978 |
+
{
|
| 84979 |
+
"epoch": 0.05993893033278228,
|
| 84980 |
+
"grad_norm": 19.875,
|
| 84981 |
+
"learning_rate": 0.0004900505894142466,
|
| 84982 |
+
"loss": 14.6913,
|
| 84983 |
+
"step": 121390
|
| 84984 |
+
},
|
| 84985 |
+
{
|
| 84986 |
+
"epoch": 0.059943868048437016,
|
| 84987 |
+
"grad_norm": 34.5,
|
| 84988 |
+
"learning_rate": 0.0004900497663939068,
|
| 84989 |
+
"loss": 14.6392,
|
| 84990 |
+
"step": 121400
|
| 84991 |
+
},
|
| 84992 |
+
{
|
| 84993 |
+
"epoch": 0.05994880576409175,
|
| 84994 |
+
"grad_norm": 9.3125,
|
| 84995 |
+
"learning_rate": 0.000490048943373567,
|
| 84996 |
+
"loss": 14.5991,
|
| 84997 |
+
"step": 121410
|
| 84998 |
+
},
|
| 84999 |
+
{
|
| 85000 |
+
"epoch": 0.059953743479746475,
|
| 85001 |
+
"grad_norm": 7.375,
|
| 85002 |
+
"learning_rate": 0.0004900481203532271,
|
| 85003 |
+
"loss": 14.5578,
|
| 85004 |
+
"step": 121420
|
| 85005 |
+
},
|
| 85006 |
+
{
|
| 85007 |
+
"epoch": 0.05995868119540121,
|
| 85008 |
+
"grad_norm": 10.0625,
|
| 85009 |
+
"learning_rate": 0.0004900472973328873,
|
| 85010 |
+
"loss": 14.6561,
|
| 85011 |
+
"step": 121430
|
| 85012 |
+
},
|
| 85013 |
+
{
|
| 85014 |
+
"epoch": 0.05996361891105594,
|
| 85015 |
+
"grad_norm": 79.5,
|
| 85016 |
+
"learning_rate": 0.0004900464743125475,
|
| 85017 |
+
"loss": 14.7054,
|
| 85018 |
+
"step": 121440
|
| 85019 |
+
},
|
| 85020 |
+
{
|
| 85021 |
+
"epoch": 0.05996855662671067,
|
| 85022 |
+
"grad_norm": 8.4375,
|
| 85023 |
+
"learning_rate": 0.0004900456512922078,
|
| 85024 |
+
"loss": 14.6515,
|
| 85025 |
+
"step": 121450
|
| 85026 |
+
},
|
| 85027 |
+
{
|
| 85028 |
+
"epoch": 0.0599734943423654,
|
| 85029 |
+
"grad_norm": 8.6875,
|
| 85030 |
+
"learning_rate": 0.000490044828271868,
|
| 85031 |
+
"loss": 14.7677,
|
| 85032 |
+
"step": 121460
|
| 85033 |
+
},
|
| 85034 |
+
{
|
| 85035 |
+
"epoch": 0.059978432058020135,
|
| 85036 |
+
"grad_norm": 8.375,
|
| 85037 |
+
"learning_rate": 0.0004900440052515282,
|
| 85038 |
+
"loss": 14.7085,
|
| 85039 |
+
"step": 121470
|
| 85040 |
+
},
|
| 85041 |
+
{
|
| 85042 |
+
"epoch": 0.05998336977367486,
|
| 85043 |
+
"grad_norm": 44.5,
|
| 85044 |
+
"learning_rate": 0.0004900431822311884,
|
| 85045 |
+
"loss": 14.7554,
|
| 85046 |
+
"step": 121480
|
| 85047 |
+
},
|
| 85048 |
+
{
|
| 85049 |
+
"epoch": 0.059988307489329595,
|
| 85050 |
+
"grad_norm": 18.875,
|
| 85051 |
+
"learning_rate": 0.0004900423592108486,
|
| 85052 |
+
"loss": 14.7442,
|
| 85053 |
+
"step": 121490
|
| 85054 |
+
},
|
| 85055 |
+
{
|
| 85056 |
+
"epoch": 0.05999324520498433,
|
| 85057 |
+
"grad_norm": 12.5625,
|
| 85058 |
+
"learning_rate": 0.0004900415361905087,
|
| 85059 |
+
"loss": 14.6697,
|
| 85060 |
+
"step": 121500
|
| 85061 |
+
},
|
| 85062 |
+
{
|
| 85063 |
+
"epoch": 0.05999818292063906,
|
| 85064 |
+
"grad_norm": 10.8125,
|
| 85065 |
+
"learning_rate": 0.000490040713170169,
|
| 85066 |
+
"loss": 14.5874,
|
| 85067 |
+
"step": 121510
|
| 85068 |
+
},
|
| 85069 |
+
{
|
| 85070 |
+
"epoch": 0.06000312063629379,
|
| 85071 |
+
"grad_norm": 7.25,
|
| 85072 |
+
"learning_rate": 0.0004900398901498292,
|
| 85073 |
+
"loss": 14.6752,
|
| 85074 |
+
"step": 121520
|
| 85075 |
+
},
|
| 85076 |
+
{
|
| 85077 |
+
"epoch": 0.06000805835194852,
|
| 85078 |
+
"grad_norm": 9.1875,
|
| 85079 |
+
"learning_rate": 0.0004900390671294894,
|
| 85080 |
+
"loss": 14.6608,
|
| 85081 |
+
"step": 121530
|
| 85082 |
+
},
|
| 85083 |
+
{
|
| 85084 |
+
"epoch": 0.060012996067603255,
|
| 85085 |
+
"grad_norm": 8.0625,
|
| 85086 |
+
"learning_rate": 0.0004900382441091496,
|
| 85087 |
+
"loss": 14.6988,
|
| 85088 |
+
"step": 121540
|
| 85089 |
+
},
|
| 85090 |
+
{
|
| 85091 |
+
"epoch": 0.06001793378325798,
|
| 85092 |
+
"grad_norm": 7.96875,
|
| 85093 |
+
"learning_rate": 0.0004900374210888098,
|
| 85094 |
+
"loss": 14.6037,
|
| 85095 |
+
"step": 121550
|
| 85096 |
+
},
|
| 85097 |
+
{
|
| 85098 |
+
"epoch": 0.060022871498912715,
|
| 85099 |
+
"grad_norm": 10.6875,
|
| 85100 |
+
"learning_rate": 0.00049003659806847,
|
| 85101 |
+
"loss": 14.7486,
|
| 85102 |
+
"step": 121560
|
| 85103 |
+
},
|
| 85104 |
+
{
|
| 85105 |
+
"epoch": 0.06002780921456745,
|
| 85106 |
+
"grad_norm": 11.0,
|
| 85107 |
+
"learning_rate": 0.0004900357750481303,
|
| 85108 |
+
"loss": 14.7613,
|
| 85109 |
+
"step": 121570
|
| 85110 |
+
},
|
| 85111 |
+
{
|
| 85112 |
+
"epoch": 0.060032746930222175,
|
| 85113 |
+
"grad_norm": 10.75,
|
| 85114 |
+
"learning_rate": 0.0004900349520277905,
|
| 85115 |
+
"loss": 14.5282,
|
| 85116 |
+
"step": 121580
|
| 85117 |
+
},
|
| 85118 |
+
{
|
| 85119 |
+
"epoch": 0.06003768464587691,
|
| 85120 |
+
"grad_norm": 9.0,
|
| 85121 |
+
"learning_rate": 0.0004900341290074506,
|
| 85122 |
+
"loss": 14.7733,
|
| 85123 |
+
"step": 121590
|
| 85124 |
+
},
|
| 85125 |
+
{
|
| 85126 |
+
"epoch": 0.06004262236153164,
|
| 85127 |
+
"grad_norm": 8.9375,
|
| 85128 |
+
"learning_rate": 0.0004900333059871108,
|
| 85129 |
+
"loss": 14.5763,
|
| 85130 |
+
"step": 121600
|
| 85131 |
+
},
|
| 85132 |
+
{
|
| 85133 |
+
"epoch": 0.06004756007718637,
|
| 85134 |
+
"grad_norm": 13.3125,
|
| 85135 |
+
"learning_rate": 0.000490032482966771,
|
| 85136 |
+
"loss": 14.6428,
|
| 85137 |
+
"step": 121610
|
| 85138 |
+
},
|
| 85139 |
+
{
|
| 85140 |
+
"epoch": 0.0600524977928411,
|
| 85141 |
+
"grad_norm": 8.8125,
|
| 85142 |
+
"learning_rate": 0.0004900316599464313,
|
| 85143 |
+
"loss": 14.6828,
|
| 85144 |
+
"step": 121620
|
| 85145 |
+
},
|
| 85146 |
+
{
|
| 85147 |
+
"epoch": 0.060057435508495835,
|
| 85148 |
+
"grad_norm": 61.75,
|
| 85149 |
+
"learning_rate": 0.0004900308369260915,
|
| 85150 |
+
"loss": 14.5284,
|
| 85151 |
+
"step": 121630
|
| 85152 |
+
},
|
| 85153 |
+
{
|
| 85154 |
+
"epoch": 0.06006237322415056,
|
| 85155 |
+
"grad_norm": 18.625,
|
| 85156 |
+
"learning_rate": 0.0004900300139057517,
|
| 85157 |
+
"loss": 14.5693,
|
| 85158 |
+
"step": 121640
|
| 85159 |
+
},
|
| 85160 |
+
{
|
| 85161 |
+
"epoch": 0.060067310939805295,
|
| 85162 |
+
"grad_norm": 9.0,
|
| 85163 |
+
"learning_rate": 0.0004900291908854119,
|
| 85164 |
+
"loss": 14.7845,
|
| 85165 |
+
"step": 121650
|
| 85166 |
+
},
|
| 85167 |
+
{
|
| 85168 |
+
"epoch": 0.06007224865546003,
|
| 85169 |
+
"grad_norm": 8.6875,
|
| 85170 |
+
"learning_rate": 0.000490028367865072,
|
| 85171 |
+
"loss": 14.7955,
|
| 85172 |
+
"step": 121660
|
| 85173 |
+
},
|
| 85174 |
+
{
|
| 85175 |
+
"epoch": 0.06007718637111476,
|
| 85176 |
+
"grad_norm": 8.6875,
|
| 85177 |
+
"learning_rate": 0.0004900275448447322,
|
| 85178 |
+
"loss": 14.6117,
|
| 85179 |
+
"step": 121670
|
| 85180 |
+
},
|
| 85181 |
+
{
|
| 85182 |
+
"epoch": 0.06008212408676949,
|
| 85183 |
+
"grad_norm": 9.375,
|
| 85184 |
+
"learning_rate": 0.0004900267218243925,
|
| 85185 |
+
"loss": 14.7178,
|
| 85186 |
+
"step": 121680
|
| 85187 |
+
},
|
| 85188 |
+
{
|
| 85189 |
+
"epoch": 0.06008706180242422,
|
| 85190 |
+
"grad_norm": 9.3125,
|
| 85191 |
+
"learning_rate": 0.0004900258988040527,
|
| 85192 |
+
"loss": 14.5357,
|
| 85193 |
+
"step": 121690
|
| 85194 |
+
},
|
| 85195 |
+
{
|
| 85196 |
+
"epoch": 0.060091999518078955,
|
| 85197 |
+
"grad_norm": 30.625,
|
| 85198 |
+
"learning_rate": 0.0004900250757837129,
|
| 85199 |
+
"loss": 14.688,
|
| 85200 |
+
"step": 121700
|
| 85201 |
+
},
|
| 85202 |
+
{
|
| 85203 |
+
"epoch": 0.06009693723373368,
|
| 85204 |
+
"grad_norm": 9.0,
|
| 85205 |
+
"learning_rate": 0.0004900242527633731,
|
| 85206 |
+
"loss": 14.6058,
|
| 85207 |
+
"step": 121710
|
| 85208 |
+
},
|
| 85209 |
+
{
|
| 85210 |
+
"epoch": 0.060101874949388415,
|
| 85211 |
+
"grad_norm": 12.0,
|
| 85212 |
+
"learning_rate": 0.0004900234297430333,
|
| 85213 |
+
"loss": 14.7661,
|
| 85214 |
+
"step": 121720
|
| 85215 |
+
},
|
| 85216 |
+
{
|
| 85217 |
+
"epoch": 0.06010681266504315,
|
| 85218 |
+
"grad_norm": 9.4375,
|
| 85219 |
+
"learning_rate": 0.0004900226067226935,
|
| 85220 |
+
"loss": 14.5204,
|
| 85221 |
+
"step": 121730
|
| 85222 |
+
},
|
| 85223 |
+
{
|
| 85224 |
+
"epoch": 0.060111750380697875,
|
| 85225 |
+
"grad_norm": 8.125,
|
| 85226 |
+
"learning_rate": 0.0004900217837023538,
|
| 85227 |
+
"loss": 14.5525,
|
| 85228 |
+
"step": 121740
|
| 85229 |
+
},
|
| 85230 |
+
{
|
| 85231 |
+
"epoch": 0.06011668809635261,
|
| 85232 |
+
"grad_norm": 8.25,
|
| 85233 |
+
"learning_rate": 0.000490020960682014,
|
| 85234 |
+
"loss": 14.7768,
|
| 85235 |
+
"step": 121750
|
| 85236 |
+
},
|
| 85237 |
+
{
|
| 85238 |
+
"epoch": 0.06012162581200734,
|
| 85239 |
+
"grad_norm": 9.75,
|
| 85240 |
+
"learning_rate": 0.0004900201376616741,
|
| 85241 |
+
"loss": 14.5709,
|
| 85242 |
+
"step": 121760
|
| 85243 |
+
},
|
| 85244 |
+
{
|
| 85245 |
+
"epoch": 0.06012656352766207,
|
| 85246 |
+
"grad_norm": 10.5,
|
| 85247 |
+
"learning_rate": 0.0004900193146413343,
|
| 85248 |
+
"loss": 14.5302,
|
| 85249 |
+
"step": 121770
|
| 85250 |
+
},
|
| 85251 |
+
{
|
| 85252 |
+
"epoch": 0.0601315012433168,
|
| 85253 |
+
"grad_norm": 20.375,
|
| 85254 |
+
"learning_rate": 0.0004900184916209945,
|
| 85255 |
+
"loss": 14.6281,
|
| 85256 |
+
"step": 121780
|
| 85257 |
+
},
|
| 85258 |
+
{
|
| 85259 |
+
"epoch": 0.060136438958971535,
|
| 85260 |
+
"grad_norm": 9.3125,
|
| 85261 |
+
"learning_rate": 0.0004900176686006547,
|
| 85262 |
+
"loss": 14.6019,
|
| 85263 |
+
"step": 121790
|
| 85264 |
+
},
|
| 85265 |
+
{
|
| 85266 |
+
"epoch": 0.06014137667462626,
|
| 85267 |
+
"grad_norm": 8.75,
|
| 85268 |
+
"learning_rate": 0.000490016845580315,
|
| 85269 |
+
"loss": 14.6996,
|
| 85270 |
+
"step": 121800
|
| 85271 |
+
},
|
| 85272 |
+
{
|
| 85273 |
+
"epoch": 0.060146314390280994,
|
| 85274 |
+
"grad_norm": 9.375,
|
| 85275 |
+
"learning_rate": 0.0004900160225599752,
|
| 85276 |
+
"loss": 14.6491,
|
| 85277 |
+
"step": 121810
|
| 85278 |
+
},
|
| 85279 |
+
{
|
| 85280 |
+
"epoch": 0.06015125210593573,
|
| 85281 |
+
"grad_norm": 12.0,
|
| 85282 |
+
"learning_rate": 0.0004900151995396354,
|
| 85283 |
+
"loss": 14.7936,
|
| 85284 |
+
"step": 121820
|
| 85285 |
+
},
|
| 85286 |
+
{
|
| 85287 |
+
"epoch": 0.06015618982159046,
|
| 85288 |
+
"grad_norm": 8.8125,
|
| 85289 |
+
"learning_rate": 0.0004900143765192955,
|
| 85290 |
+
"loss": 14.7424,
|
| 85291 |
+
"step": 121830
|
| 85292 |
+
},
|
| 85293 |
+
{
|
| 85294 |
+
"epoch": 0.06016112753724519,
|
| 85295 |
+
"grad_norm": 15.5,
|
| 85296 |
+
"learning_rate": 0.0004900135534989557,
|
| 85297 |
+
"loss": 14.6962,
|
| 85298 |
+
"step": 121840
|
| 85299 |
+
},
|
| 85300 |
+
{
|
| 85301 |
+
"epoch": 0.06016606525289992,
|
| 85302 |
+
"grad_norm": 9.0,
|
| 85303 |
+
"learning_rate": 0.0004900127304786159,
|
| 85304 |
+
"loss": 14.6642,
|
| 85305 |
+
"step": 121850
|
| 85306 |
+
},
|
| 85307 |
+
{
|
| 85308 |
+
"epoch": 0.060171002968554654,
|
| 85309 |
+
"grad_norm": 10.1875,
|
| 85310 |
+
"learning_rate": 0.0004900119074582762,
|
| 85311 |
+
"loss": 14.7013,
|
| 85312 |
+
"step": 121860
|
| 85313 |
+
},
|
| 85314 |
+
{
|
| 85315 |
+
"epoch": 0.06017594068420938,
|
| 85316 |
+
"grad_norm": 10.375,
|
| 85317 |
+
"learning_rate": 0.0004900110844379364,
|
| 85318 |
+
"loss": 14.7083,
|
| 85319 |
+
"step": 121870
|
| 85320 |
+
},
|
| 85321 |
+
{
|
| 85322 |
+
"epoch": 0.060180878399864114,
|
| 85323 |
+
"grad_norm": 8.4375,
|
| 85324 |
+
"learning_rate": 0.0004900102614175966,
|
| 85325 |
+
"loss": 14.4611,
|
| 85326 |
+
"step": 121880
|
| 85327 |
+
},
|
| 85328 |
+
{
|
| 85329 |
+
"epoch": 0.06018581611551885,
|
| 85330 |
+
"grad_norm": 8.5625,
|
| 85331 |
+
"learning_rate": 0.0004900094383972568,
|
| 85332 |
+
"loss": 14.6936,
|
| 85333 |
+
"step": 121890
|
| 85334 |
+
},
|
| 85335 |
+
{
|
| 85336 |
+
"epoch": 0.060190753831173574,
|
| 85337 |
+
"grad_norm": 26.625,
|
| 85338 |
+
"learning_rate": 0.000490008615376917,
|
| 85339 |
+
"loss": 14.6797,
|
| 85340 |
+
"step": 121900
|
| 85341 |
+
},
|
| 85342 |
+
{
|
| 85343 |
+
"epoch": 0.06019569154682831,
|
| 85344 |
+
"grad_norm": 9.0625,
|
| 85345 |
+
"learning_rate": 0.0004900077923565773,
|
| 85346 |
+
"loss": 14.6074,
|
| 85347 |
+
"step": 121910
|
| 85348 |
+
},
|
| 85349 |
+
{
|
| 85350 |
+
"epoch": 0.06020062926248304,
|
| 85351 |
+
"grad_norm": 10.4375,
|
| 85352 |
+
"learning_rate": 0.0004900069693362374,
|
| 85353 |
+
"loss": 14.6961,
|
| 85354 |
+
"step": 121920
|
| 85355 |
+
},
|
| 85356 |
+
{
|
| 85357 |
+
"epoch": 0.06020556697813777,
|
| 85358 |
+
"grad_norm": 9.0625,
|
| 85359 |
+
"learning_rate": 0.0004900061463158976,
|
| 85360 |
+
"loss": 14.7742,
|
| 85361 |
+
"step": 121930
|
| 85362 |
+
},
|
| 85363 |
+
{
|
| 85364 |
+
"epoch": 0.0602105046937925,
|
| 85365 |
+
"grad_norm": 8.25,
|
| 85366 |
+
"learning_rate": 0.0004900053232955578,
|
| 85367 |
+
"loss": 14.828,
|
| 85368 |
+
"step": 121940
|
| 85369 |
+
},
|
| 85370 |
+
{
|
| 85371 |
+
"epoch": 0.060215442409447234,
|
| 85372 |
+
"grad_norm": 16.375,
|
| 85373 |
+
"learning_rate": 0.000490004500275218,
|
| 85374 |
+
"loss": 14.4311,
|
| 85375 |
+
"step": 121950
|
| 85376 |
+
},
|
| 85377 |
+
{
|
| 85378 |
+
"epoch": 0.06022038012510196,
|
| 85379 |
+
"grad_norm": 8.0625,
|
| 85380 |
+
"learning_rate": 0.0004900036772548782,
|
| 85381 |
+
"loss": 14.4625,
|
| 85382 |
+
"step": 121960
|
| 85383 |
+
},
|
| 85384 |
+
{
|
| 85385 |
+
"epoch": 0.060225317840756694,
|
| 85386 |
+
"grad_norm": 9.5,
|
| 85387 |
+
"learning_rate": 0.0004900028542345385,
|
| 85388 |
+
"loss": 14.7465,
|
| 85389 |
+
"step": 121970
|
| 85390 |
+
},
|
| 85391 |
+
{
|
| 85392 |
+
"epoch": 0.06023025555641143,
|
| 85393 |
+
"grad_norm": 7.78125,
|
| 85394 |
+
"learning_rate": 0.0004900020312141987,
|
| 85395 |
+
"loss": 14.4267,
|
| 85396 |
+
"step": 121980
|
| 85397 |
+
},
|
| 85398 |
+
{
|
| 85399 |
+
"epoch": 0.06023519327206616,
|
| 85400 |
+
"grad_norm": 7.8125,
|
| 85401 |
+
"learning_rate": 0.0004900012081938589,
|
| 85402 |
+
"loss": 14.7071,
|
| 85403 |
+
"step": 121990
|
| 85404 |
+
},
|
| 85405 |
+
{
|
| 85406 |
+
"epoch": 0.06024013098772089,
|
| 85407 |
+
"grad_norm": 10.0,
|
| 85408 |
+
"learning_rate": 0.000490000385173519,
|
| 85409 |
+
"loss": 14.6893,
|
| 85410 |
+
"step": 122000
|
| 85411 |
}
|
| 85412 |
],
|
| 85413 |
"logging_steps": 10,
|
|
|
|
| 85427 |
"attributes": {}
|
| 85428 |
}
|
| 85429 |
},
|
| 85430 |
+
"total_flos": 2.6015940864424647e+20,
|
| 85431 |
"train_batch_size": 16,
|
| 85432 |
"trial_name": null,
|
| 85433 |
"trial_params": null
|