Upload folder using huggingface_hub
Browse files- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +1402 -2
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1909053417
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1da777fdc2f589e2fd22bba7e2fef4a2ed107c1214679527001ae57564ee62c4
|
3 |
size 1909053417
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 969281034
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c9fd6559fcb670e2401c547414ff82836bdc646bb72642a2eec3867cef32b65
|
3 |
size 969281034
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f85273902d9a34de6c102fe53d722bd3322efbe0f1ea96148f22d32665f5895c
|
3 |
size 14244
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b4faebb8a687e5379473d389ae7f00051e86251b0e805e330f53a52ba86cf88
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -9807,6 +9807,1406 @@
|
|
9807 |
"learning_rate": 9.99474835923667e-06,
|
9808 |
"loss": 117.067,
|
9809 |
"step": 14000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9810 |
}
|
9811 |
],
|
9812 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.13235719899077636,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 16000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
9807 |
"learning_rate": 9.99474835923667e-06,
|
9808 |
"loss": 117.067,
|
9809 |
"step": 14000
|
9810 |
+
},
|
9811 |
+
{
|
9812 |
+
"epoch": 0.11589527236629855,
|
9813 |
+
"grad_norm": 870.8054809570312,
|
9814 |
+
"learning_rate": 9.994683536271437e-06,
|
9815 |
+
"loss": 177.513,
|
9816 |
+
"step": 14010
|
9817 |
+
},
|
9818 |
+
{
|
9819 |
+
"epoch": 0.11597799561566778,
|
9820 |
+
"grad_norm": 1938.4117431640625,
|
9821 |
+
"learning_rate": 9.994618315902161e-06,
|
9822 |
+
"loss": 147.8295,
|
9823 |
+
"step": 14020
|
9824 |
+
},
|
9825 |
+
{
|
9826 |
+
"epoch": 0.11606071886503702,
|
9827 |
+
"grad_norm": 1145.3619384765625,
|
9828 |
+
"learning_rate": 9.994552698134023e-06,
|
9829 |
+
"loss": 126.2492,
|
9830 |
+
"step": 14030
|
9831 |
+
},
|
9832 |
+
{
|
9833 |
+
"epoch": 0.11614344211440625,
|
9834 |
+
"grad_norm": 704.4757080078125,
|
9835 |
+
"learning_rate": 9.994486682972253e-06,
|
9836 |
+
"loss": 183.3489,
|
9837 |
+
"step": 14040
|
9838 |
+
},
|
9839 |
+
{
|
9840 |
+
"epoch": 0.11622616536377549,
|
9841 |
+
"grad_norm": 799.8057250976562,
|
9842 |
+
"learning_rate": 9.994420270422096e-06,
|
9843 |
+
"loss": 155.8286,
|
9844 |
+
"step": 14050
|
9845 |
+
},
|
9846 |
+
{
|
9847 |
+
"epoch": 0.11630888861314473,
|
9848 |
+
"grad_norm": 2168.39794921875,
|
9849 |
+
"learning_rate": 9.994353460488842e-06,
|
9850 |
+
"loss": 165.6206,
|
9851 |
+
"step": 14060
|
9852 |
+
},
|
9853 |
+
{
|
9854 |
+
"epoch": 0.11639161186251396,
|
9855 |
+
"grad_norm": 1048.3438720703125,
|
9856 |
+
"learning_rate": 9.994286253177803e-06,
|
9857 |
+
"loss": 196.4472,
|
9858 |
+
"step": 14070
|
9859 |
+
},
|
9860 |
+
{
|
9861 |
+
"epoch": 0.1164743351118832,
|
9862 |
+
"grad_norm": 1240.358642578125,
|
9863 |
+
"learning_rate": 9.994218648494327e-06,
|
9864 |
+
"loss": 169.1644,
|
9865 |
+
"step": 14080
|
9866 |
+
},
|
9867 |
+
{
|
9868 |
+
"epoch": 0.11655705836125244,
|
9869 |
+
"grad_norm": 1450.99169921875,
|
9870 |
+
"learning_rate": 9.994150646443793e-06,
|
9871 |
+
"loss": 119.286,
|
9872 |
+
"step": 14090
|
9873 |
+
},
|
9874 |
+
{
|
9875 |
+
"epoch": 0.11663978161062166,
|
9876 |
+
"grad_norm": 1026.7149658203125,
|
9877 |
+
"learning_rate": 9.994082247031613e-06,
|
9878 |
+
"loss": 166.7578,
|
9879 |
+
"step": 14100
|
9880 |
+
},
|
9881 |
+
{
|
9882 |
+
"epoch": 0.1167225048599909,
|
9883 |
+
"grad_norm": 1081.6656494140625,
|
9884 |
+
"learning_rate": 9.99401345026323e-06,
|
9885 |
+
"loss": 141.0757,
|
9886 |
+
"step": 14110
|
9887 |
+
},
|
9888 |
+
{
|
9889 |
+
"epoch": 0.11680522810936013,
|
9890 |
+
"grad_norm": 746.9979248046875,
|
9891 |
+
"learning_rate": 9.993944256144115e-06,
|
9892 |
+
"loss": 124.9759,
|
9893 |
+
"step": 14120
|
9894 |
+
},
|
9895 |
+
{
|
9896 |
+
"epoch": 0.11688795135872937,
|
9897 |
+
"grad_norm": 891.9210815429688,
|
9898 |
+
"learning_rate": 9.993874664679774e-06,
|
9899 |
+
"loss": 150.3685,
|
9900 |
+
"step": 14130
|
9901 |
+
},
|
9902 |
+
{
|
9903 |
+
"epoch": 0.11697067460809861,
|
9904 |
+
"grad_norm": 1401.4002685546875,
|
9905 |
+
"learning_rate": 9.993804675875744e-06,
|
9906 |
+
"loss": 168.8493,
|
9907 |
+
"step": 14140
|
9908 |
+
},
|
9909 |
+
{
|
9910 |
+
"epoch": 0.11705339785746784,
|
9911 |
+
"grad_norm": 1588.7640380859375,
|
9912 |
+
"learning_rate": 9.993734289737596e-06,
|
9913 |
+
"loss": 141.4464,
|
9914 |
+
"step": 14150
|
9915 |
+
},
|
9916 |
+
{
|
9917 |
+
"epoch": 0.11713612110683708,
|
9918 |
+
"grad_norm": 1859.66552734375,
|
9919 |
+
"learning_rate": 9.993663506270928e-06,
|
9920 |
+
"loss": 162.024,
|
9921 |
+
"step": 14160
|
9922 |
+
},
|
9923 |
+
{
|
9924 |
+
"epoch": 0.11721884435620632,
|
9925 |
+
"grad_norm": 1133.1839599609375,
|
9926 |
+
"learning_rate": 9.993592325481373e-06,
|
9927 |
+
"loss": 166.6096,
|
9928 |
+
"step": 14170
|
9929 |
+
},
|
9930 |
+
{
|
9931 |
+
"epoch": 0.11730156760557554,
|
9932 |
+
"grad_norm": 1811.849365234375,
|
9933 |
+
"learning_rate": 9.993520747374594e-06,
|
9934 |
+
"loss": 127.2197,
|
9935 |
+
"step": 14180
|
9936 |
+
},
|
9937 |
+
{
|
9938 |
+
"epoch": 0.11738429085494478,
|
9939 |
+
"grad_norm": 909.2362060546875,
|
9940 |
+
"learning_rate": 9.993448771956285e-06,
|
9941 |
+
"loss": 189.4919,
|
9942 |
+
"step": 14190
|
9943 |
+
},
|
9944 |
+
{
|
9945 |
+
"epoch": 0.11746701410431402,
|
9946 |
+
"grad_norm": 1350.44140625,
|
9947 |
+
"learning_rate": 9.993376399232175e-06,
|
9948 |
+
"loss": 142.4382,
|
9949 |
+
"step": 14200
|
9950 |
+
},
|
9951 |
+
{
|
9952 |
+
"epoch": 0.11754973735368325,
|
9953 |
+
"grad_norm": 1765.2679443359375,
|
9954 |
+
"learning_rate": 9.993303629208023e-06,
|
9955 |
+
"loss": 148.8411,
|
9956 |
+
"step": 14210
|
9957 |
+
},
|
9958 |
+
{
|
9959 |
+
"epoch": 0.11763246060305249,
|
9960 |
+
"grad_norm": 2343.818359375,
|
9961 |
+
"learning_rate": 9.993230461889616e-06,
|
9962 |
+
"loss": 212.7168,
|
9963 |
+
"step": 14220
|
9964 |
+
},
|
9965 |
+
{
|
9966 |
+
"epoch": 0.11771518385242172,
|
9967 |
+
"grad_norm": 683.275146484375,
|
9968 |
+
"learning_rate": 9.993156897282776e-06,
|
9969 |
+
"loss": 148.4446,
|
9970 |
+
"step": 14230
|
9971 |
+
},
|
9972 |
+
{
|
9973 |
+
"epoch": 0.11779790710179096,
|
9974 |
+
"grad_norm": 998.6349487304688,
|
9975 |
+
"learning_rate": 9.99308293539336e-06,
|
9976 |
+
"loss": 117.4103,
|
9977 |
+
"step": 14240
|
9978 |
+
},
|
9979 |
+
{
|
9980 |
+
"epoch": 0.1178806303511602,
|
9981 |
+
"grad_norm": 823.5191040039062,
|
9982 |
+
"learning_rate": 9.993008576227248e-06,
|
9983 |
+
"loss": 130.8048,
|
9984 |
+
"step": 14250
|
9985 |
+
},
|
9986 |
+
{
|
9987 |
+
"epoch": 0.11796335360052942,
|
9988 |
+
"grad_norm": 1420.8035888671875,
|
9989 |
+
"learning_rate": 9.992933819790358e-06,
|
9990 |
+
"loss": 163.5295,
|
9991 |
+
"step": 14260
|
9992 |
+
},
|
9993 |
+
{
|
9994 |
+
"epoch": 0.11804607684989867,
|
9995 |
+
"grad_norm": 1167.037109375,
|
9996 |
+
"learning_rate": 9.992858666088638e-06,
|
9997 |
+
"loss": 164.6194,
|
9998 |
+
"step": 14270
|
9999 |
+
},
|
10000 |
+
{
|
10001 |
+
"epoch": 0.1181288000992679,
|
10002 |
+
"grad_norm": 1515.985107421875,
|
10003 |
+
"learning_rate": 9.992783115128072e-06,
|
10004 |
+
"loss": 163.406,
|
10005 |
+
"step": 14280
|
10006 |
+
},
|
10007 |
+
{
|
10008 |
+
"epoch": 0.11821152334863713,
|
10009 |
+
"grad_norm": 1549.8917236328125,
|
10010 |
+
"learning_rate": 9.992707166914662e-06,
|
10011 |
+
"loss": 168.7726,
|
10012 |
+
"step": 14290
|
10013 |
+
},
|
10014 |
+
{
|
10015 |
+
"epoch": 0.11829424659800637,
|
10016 |
+
"grad_norm": 1190.7861328125,
|
10017 |
+
"learning_rate": 9.992630821454458e-06,
|
10018 |
+
"loss": 140.9276,
|
10019 |
+
"step": 14300
|
10020 |
+
},
|
10021 |
+
{
|
10022 |
+
"epoch": 0.1183769698473756,
|
10023 |
+
"grad_norm": 1568.7037353515625,
|
10024 |
+
"learning_rate": 9.992554078753534e-06,
|
10025 |
+
"loss": 147.5554,
|
10026 |
+
"step": 14310
|
10027 |
+
},
|
10028 |
+
{
|
10029 |
+
"epoch": 0.11845969309674484,
|
10030 |
+
"grad_norm": 1000.02880859375,
|
10031 |
+
"learning_rate": 9.992476938817994e-06,
|
10032 |
+
"loss": 180.1213,
|
10033 |
+
"step": 14320
|
10034 |
+
},
|
10035 |
+
{
|
10036 |
+
"epoch": 0.11854241634611408,
|
10037 |
+
"grad_norm": 1296.5947265625,
|
10038 |
+
"learning_rate": 9.992399401653976e-06,
|
10039 |
+
"loss": 137.781,
|
10040 |
+
"step": 14330
|
10041 |
+
},
|
10042 |
+
{
|
10043 |
+
"epoch": 0.1186251395954833,
|
10044 |
+
"grad_norm": 1144.0504150390625,
|
10045 |
+
"learning_rate": 9.99232146726765e-06,
|
10046 |
+
"loss": 140.0204,
|
10047 |
+
"step": 14340
|
10048 |
+
},
|
10049 |
+
{
|
10050 |
+
"epoch": 0.11870786284485255,
|
10051 |
+
"grad_norm": 1281.876708984375,
|
10052 |
+
"learning_rate": 9.992243135665217e-06,
|
10053 |
+
"loss": 154.8919,
|
10054 |
+
"step": 14350
|
10055 |
+
},
|
10056 |
+
{
|
10057 |
+
"epoch": 0.11879058609422179,
|
10058 |
+
"grad_norm": 871.8610229492188,
|
10059 |
+
"learning_rate": 9.992164406852908e-06,
|
10060 |
+
"loss": 186.1516,
|
10061 |
+
"step": 14360
|
10062 |
+
},
|
10063 |
+
{
|
10064 |
+
"epoch": 0.11887330934359101,
|
10065 |
+
"grad_norm": 1534.3536376953125,
|
10066 |
+
"learning_rate": 9.992085280836988e-06,
|
10067 |
+
"loss": 160.6092,
|
10068 |
+
"step": 14370
|
10069 |
+
},
|
10070 |
+
{
|
10071 |
+
"epoch": 0.11895603259296025,
|
10072 |
+
"grad_norm": 988.0948486328125,
|
10073 |
+
"learning_rate": 9.992005757623753e-06,
|
10074 |
+
"loss": 203.4977,
|
10075 |
+
"step": 14380
|
10076 |
+
},
|
10077 |
+
{
|
10078 |
+
"epoch": 0.11903875584232948,
|
10079 |
+
"grad_norm": 1357.1668701171875,
|
10080 |
+
"learning_rate": 9.991925837219532e-06,
|
10081 |
+
"loss": 160.5042,
|
10082 |
+
"step": 14390
|
10083 |
+
},
|
10084 |
+
{
|
10085 |
+
"epoch": 0.11912147909169872,
|
10086 |
+
"grad_norm": 636.4329223632812,
|
10087 |
+
"learning_rate": 9.991845519630679e-06,
|
10088 |
+
"loss": 137.9073,
|
10089 |
+
"step": 14400
|
10090 |
+
},
|
10091 |
+
{
|
10092 |
+
"epoch": 0.11920420234106796,
|
10093 |
+
"grad_norm": 1027.252197265625,
|
10094 |
+
"learning_rate": 9.991764804863588e-06,
|
10095 |
+
"loss": 122.9028,
|
10096 |
+
"step": 14410
|
10097 |
+
},
|
10098 |
+
{
|
10099 |
+
"epoch": 0.11928692559043719,
|
10100 |
+
"grad_norm": 2356.058837890625,
|
10101 |
+
"learning_rate": 9.991683692924682e-06,
|
10102 |
+
"loss": 155.1582,
|
10103 |
+
"step": 14420
|
10104 |
+
},
|
10105 |
+
{
|
10106 |
+
"epoch": 0.11936964883980643,
|
10107 |
+
"grad_norm": 743.142578125,
|
10108 |
+
"learning_rate": 9.991602183820412e-06,
|
10109 |
+
"loss": 142.9998,
|
10110 |
+
"step": 14430
|
10111 |
+
},
|
10112 |
+
{
|
10113 |
+
"epoch": 0.11945237208917567,
|
10114 |
+
"grad_norm": 1039.77978515625,
|
10115 |
+
"learning_rate": 9.991520277557266e-06,
|
10116 |
+
"loss": 156.9646,
|
10117 |
+
"step": 14440
|
10118 |
+
},
|
10119 |
+
{
|
10120 |
+
"epoch": 0.1195350953385449,
|
10121 |
+
"grad_norm": 1624.250732421875,
|
10122 |
+
"learning_rate": 9.991437974141759e-06,
|
10123 |
+
"loss": 165.5059,
|
10124 |
+
"step": 14450
|
10125 |
+
},
|
10126 |
+
{
|
10127 |
+
"epoch": 0.11961781858791413,
|
10128 |
+
"grad_norm": 589.4429321289062,
|
10129 |
+
"learning_rate": 9.99135527358044e-06,
|
10130 |
+
"loss": 119.63,
|
10131 |
+
"step": 14460
|
10132 |
+
},
|
10133 |
+
{
|
10134 |
+
"epoch": 0.11970054183728338,
|
10135 |
+
"grad_norm": 1199.8302001953125,
|
10136 |
+
"learning_rate": 9.991272175879888e-06,
|
10137 |
+
"loss": 175.7935,
|
10138 |
+
"step": 14470
|
10139 |
+
},
|
10140 |
+
{
|
10141 |
+
"epoch": 0.1197832650866526,
|
10142 |
+
"grad_norm": 1233.6771240234375,
|
10143 |
+
"learning_rate": 9.991188681046718e-06,
|
10144 |
+
"loss": 192.3081,
|
10145 |
+
"step": 14480
|
10146 |
+
},
|
10147 |
+
{
|
10148 |
+
"epoch": 0.11986598833602184,
|
10149 |
+
"grad_norm": 1209.5980224609375,
|
10150 |
+
"learning_rate": 9.991104789087568e-06,
|
10151 |
+
"loss": 139.1357,
|
10152 |
+
"step": 14490
|
10153 |
+
},
|
10154 |
+
{
|
10155 |
+
"epoch": 0.11994871158539107,
|
10156 |
+
"grad_norm": 1937.0008544921875,
|
10157 |
+
"learning_rate": 9.991020500009118e-06,
|
10158 |
+
"loss": 138.8762,
|
10159 |
+
"step": 14500
|
10160 |
+
},
|
10161 |
+
{
|
10162 |
+
"epoch": 0.12003143483476031,
|
10163 |
+
"grad_norm": 545.6674194335938,
|
10164 |
+
"learning_rate": 9.990935813818073e-06,
|
10165 |
+
"loss": 139.6014,
|
10166 |
+
"step": 14510
|
10167 |
+
},
|
10168 |
+
{
|
10169 |
+
"epoch": 0.12011415808412955,
|
10170 |
+
"grad_norm": 650.5000610351562,
|
10171 |
+
"learning_rate": 9.99085073052117e-06,
|
10172 |
+
"loss": 195.5592,
|
10173 |
+
"step": 14520
|
10174 |
+
},
|
10175 |
+
{
|
10176 |
+
"epoch": 0.12019688133349878,
|
10177 |
+
"grad_norm": 566.500732421875,
|
10178 |
+
"learning_rate": 9.990765250125179e-06,
|
10179 |
+
"loss": 154.2413,
|
10180 |
+
"step": 14530
|
10181 |
+
},
|
10182 |
+
{
|
10183 |
+
"epoch": 0.12027960458286802,
|
10184 |
+
"grad_norm": 1750.378662109375,
|
10185 |
+
"learning_rate": 9.990679372636902e-06,
|
10186 |
+
"loss": 161.8778,
|
10187 |
+
"step": 14540
|
10188 |
+
},
|
10189 |
+
{
|
10190 |
+
"epoch": 0.12036232783223726,
|
10191 |
+
"grad_norm": 1349.2432861328125,
|
10192 |
+
"learning_rate": 9.99059309806317e-06,
|
10193 |
+
"loss": 172.9573,
|
10194 |
+
"step": 14550
|
10195 |
+
},
|
10196 |
+
{
|
10197 |
+
"epoch": 0.12044505108160648,
|
10198 |
+
"grad_norm": 975.1179809570312,
|
10199 |
+
"learning_rate": 9.990506426410851e-06,
|
10200 |
+
"loss": 128.1013,
|
10201 |
+
"step": 14560
|
10202 |
+
},
|
10203 |
+
{
|
10204 |
+
"epoch": 0.12052777433097572,
|
10205 |
+
"grad_norm": 3406.892333984375,
|
10206 |
+
"learning_rate": 9.990419357686839e-06,
|
10207 |
+
"loss": 177.5993,
|
10208 |
+
"step": 14570
|
10209 |
+
},
|
10210 |
+
{
|
10211 |
+
"epoch": 0.12061049758034495,
|
10212 |
+
"grad_norm": 428.1646423339844,
|
10213 |
+
"learning_rate": 9.99033189189806e-06,
|
10214 |
+
"loss": 128.9519,
|
10215 |
+
"step": 14580
|
10216 |
+
},
|
10217 |
+
{
|
10218 |
+
"epoch": 0.12069322082971419,
|
10219 |
+
"grad_norm": 996.7293701171875,
|
10220 |
+
"learning_rate": 9.990244029051475e-06,
|
10221 |
+
"loss": 149.3833,
|
10222 |
+
"step": 14590
|
10223 |
+
},
|
10224 |
+
{
|
10225 |
+
"epoch": 0.12077594407908343,
|
10226 |
+
"grad_norm": 1458.9307861328125,
|
10227 |
+
"learning_rate": 9.990155769154077e-06,
|
10228 |
+
"loss": 160.3518,
|
10229 |
+
"step": 14600
|
10230 |
+
},
|
10231 |
+
{
|
10232 |
+
"epoch": 0.12085866732845266,
|
10233 |
+
"grad_norm": 714.3155517578125,
|
10234 |
+
"learning_rate": 9.990067112212884e-06,
|
10235 |
+
"loss": 128.5733,
|
10236 |
+
"step": 14610
|
10237 |
+
},
|
10238 |
+
{
|
10239 |
+
"epoch": 0.1209413905778219,
|
10240 |
+
"grad_norm": 629.7801513671875,
|
10241 |
+
"learning_rate": 9.989978058234952e-06,
|
10242 |
+
"loss": 141.1231,
|
10243 |
+
"step": 14620
|
10244 |
+
},
|
10245 |
+
{
|
10246 |
+
"epoch": 0.12102411382719114,
|
10247 |
+
"grad_norm": 1056.1544189453125,
|
10248 |
+
"learning_rate": 9.989888607227369e-06,
|
10249 |
+
"loss": 173.8705,
|
10250 |
+
"step": 14630
|
10251 |
+
},
|
10252 |
+
{
|
10253 |
+
"epoch": 0.12110683707656036,
|
10254 |
+
"grad_norm": 1272.472412109375,
|
10255 |
+
"learning_rate": 9.989798759197247e-06,
|
10256 |
+
"loss": 146.9385,
|
10257 |
+
"step": 14640
|
10258 |
+
},
|
10259 |
+
{
|
10260 |
+
"epoch": 0.1211895603259296,
|
10261 |
+
"grad_norm": 1054.3629150390625,
|
10262 |
+
"learning_rate": 9.989708514151739e-06,
|
10263 |
+
"loss": 164.1719,
|
10264 |
+
"step": 14650
|
10265 |
+
},
|
10266 |
+
{
|
10267 |
+
"epoch": 0.12127228357529883,
|
10268 |
+
"grad_norm": 659.7613525390625,
|
10269 |
+
"learning_rate": 9.989617872098026e-06,
|
10270 |
+
"loss": 149.6539,
|
10271 |
+
"step": 14660
|
10272 |
+
},
|
10273 |
+
{
|
10274 |
+
"epoch": 0.12135500682466807,
|
10275 |
+
"grad_norm": 815.8479614257812,
|
10276 |
+
"learning_rate": 9.989526833043316e-06,
|
10277 |
+
"loss": 140.1702,
|
10278 |
+
"step": 14670
|
10279 |
+
},
|
10280 |
+
{
|
10281 |
+
"epoch": 0.12143773007403731,
|
10282 |
+
"grad_norm": 1148.9129638671875,
|
10283 |
+
"learning_rate": 9.989435396994856e-06,
|
10284 |
+
"loss": 125.2471,
|
10285 |
+
"step": 14680
|
10286 |
+
},
|
10287 |
+
{
|
10288 |
+
"epoch": 0.12152045332340654,
|
10289 |
+
"grad_norm": 3006.224609375,
|
10290 |
+
"learning_rate": 9.989343563959919e-06,
|
10291 |
+
"loss": 150.3076,
|
10292 |
+
"step": 14690
|
10293 |
+
},
|
10294 |
+
{
|
10295 |
+
"epoch": 0.12160317657277578,
|
10296 |
+
"grad_norm": 1365.89892578125,
|
10297 |
+
"learning_rate": 9.989251333945813e-06,
|
10298 |
+
"loss": 179.2145,
|
10299 |
+
"step": 14700
|
10300 |
+
},
|
10301 |
+
{
|
10302 |
+
"epoch": 0.12168589982214502,
|
10303 |
+
"grad_norm": 982.8682861328125,
|
10304 |
+
"learning_rate": 9.989158706959875e-06,
|
10305 |
+
"loss": 137.9394,
|
10306 |
+
"step": 14710
|
10307 |
+
},
|
10308 |
+
{
|
10309 |
+
"epoch": 0.12176862307151425,
|
10310 |
+
"grad_norm": 1103.151123046875,
|
10311 |
+
"learning_rate": 9.989065683009477e-06,
|
10312 |
+
"loss": 150.3043,
|
10313 |
+
"step": 14720
|
10314 |
+
},
|
10315 |
+
{
|
10316 |
+
"epoch": 0.12185134632088349,
|
10317 |
+
"grad_norm": 1068.439208984375,
|
10318 |
+
"learning_rate": 9.988972262102018e-06,
|
10319 |
+
"loss": 115.0475,
|
10320 |
+
"step": 14730
|
10321 |
+
},
|
10322 |
+
{
|
10323 |
+
"epoch": 0.12193406957025273,
|
10324 |
+
"grad_norm": 1475.9112548828125,
|
10325 |
+
"learning_rate": 9.988878444244937e-06,
|
10326 |
+
"loss": 162.3183,
|
10327 |
+
"step": 14740
|
10328 |
+
},
|
10329 |
+
{
|
10330 |
+
"epoch": 0.12201679281962195,
|
10331 |
+
"grad_norm": 1475.7916259765625,
|
10332 |
+
"learning_rate": 9.988784229445689e-06,
|
10333 |
+
"loss": 132.9056,
|
10334 |
+
"step": 14750
|
10335 |
+
},
|
10336 |
+
{
|
10337 |
+
"epoch": 0.12209951606899119,
|
10338 |
+
"grad_norm": 1580.335205078125,
|
10339 |
+
"learning_rate": 9.988689617711777e-06,
|
10340 |
+
"loss": 180.2133,
|
10341 |
+
"step": 14760
|
10342 |
+
},
|
10343 |
+
{
|
10344 |
+
"epoch": 0.12218223931836042,
|
10345 |
+
"grad_norm": 1742.8638916015625,
|
10346 |
+
"learning_rate": 9.988594609050726e-06,
|
10347 |
+
"loss": 170.6644,
|
10348 |
+
"step": 14770
|
10349 |
+
},
|
10350 |
+
{
|
10351 |
+
"epoch": 0.12226496256772966,
|
10352 |
+
"grad_norm": 778.4093017578125,
|
10353 |
+
"learning_rate": 9.988499203470097e-06,
|
10354 |
+
"loss": 163.6835,
|
10355 |
+
"step": 14780
|
10356 |
+
},
|
10357 |
+
{
|
10358 |
+
"epoch": 0.1223476858170989,
|
10359 |
+
"grad_norm": 908.4758911132812,
|
10360 |
+
"learning_rate": 9.988403400977482e-06,
|
10361 |
+
"loss": 143.079,
|
10362 |
+
"step": 14790
|
10363 |
+
},
|
10364 |
+
{
|
10365 |
+
"epoch": 0.12243040906646813,
|
10366 |
+
"grad_norm": 1540.624755859375,
|
10367 |
+
"learning_rate": 9.9883072015805e-06,
|
10368 |
+
"loss": 160.3763,
|
10369 |
+
"step": 14800
|
10370 |
+
},
|
10371 |
+
{
|
10372 |
+
"epoch": 0.12251313231583737,
|
10373 |
+
"grad_norm": 919.294677734375,
|
10374 |
+
"learning_rate": 9.98821060528681e-06,
|
10375 |
+
"loss": 165.4283,
|
10376 |
+
"step": 14810
|
10377 |
+
},
|
10378 |
+
{
|
10379 |
+
"epoch": 0.12259585556520661,
|
10380 |
+
"grad_norm": 865.2339477539062,
|
10381 |
+
"learning_rate": 9.988113612104093e-06,
|
10382 |
+
"loss": 128.951,
|
10383 |
+
"step": 14820
|
10384 |
+
},
|
10385 |
+
{
|
10386 |
+
"epoch": 0.12267857881457583,
|
10387 |
+
"grad_norm": 2098.492919921875,
|
10388 |
+
"learning_rate": 9.988016222040067e-06,
|
10389 |
+
"loss": 151.1649,
|
10390 |
+
"step": 14830
|
10391 |
+
},
|
10392 |
+
{
|
10393 |
+
"epoch": 0.12276130206394507,
|
10394 |
+
"grad_norm": 847.621337890625,
|
10395 |
+
"learning_rate": 9.987918435102484e-06,
|
10396 |
+
"loss": 121.6645,
|
10397 |
+
"step": 14840
|
10398 |
+
},
|
10399 |
+
{
|
10400 |
+
"epoch": 0.1228440253133143,
|
10401 |
+
"grad_norm": 1472.7208251953125,
|
10402 |
+
"learning_rate": 9.987820251299121e-06,
|
10403 |
+
"loss": 140.8588,
|
10404 |
+
"step": 14850
|
10405 |
+
},
|
10406 |
+
{
|
10407 |
+
"epoch": 0.12292674856268354,
|
10408 |
+
"grad_norm": 1310.5726318359375,
|
10409 |
+
"learning_rate": 9.987721670637794e-06,
|
10410 |
+
"loss": 132.6207,
|
10411 |
+
"step": 14860
|
10412 |
+
},
|
10413 |
+
{
|
10414 |
+
"epoch": 0.12300947181205278,
|
10415 |
+
"grad_norm": 788.9578247070312,
|
10416 |
+
"learning_rate": 9.987622693126342e-06,
|
10417 |
+
"loss": 139.2334,
|
10418 |
+
"step": 14870
|
10419 |
+
},
|
10420 |
+
{
|
10421 |
+
"epoch": 0.12309219506142201,
|
10422 |
+
"grad_norm": 1761.3287353515625,
|
10423 |
+
"learning_rate": 9.987523318772644e-06,
|
10424 |
+
"loss": 156.3363,
|
10425 |
+
"step": 14880
|
10426 |
+
},
|
10427 |
+
{
|
10428 |
+
"epoch": 0.12317491831079125,
|
10429 |
+
"grad_norm": 1024.44140625,
|
10430 |
+
"learning_rate": 9.987423547584605e-06,
|
10431 |
+
"loss": 167.0266,
|
10432 |
+
"step": 14890
|
10433 |
+
},
|
10434 |
+
{
|
10435 |
+
"epoch": 0.12325764156016049,
|
10436 |
+
"grad_norm": 891.505126953125,
|
10437 |
+
"learning_rate": 9.987323379570161e-06,
|
10438 |
+
"loss": 144.4436,
|
10439 |
+
"step": 14900
|
10440 |
+
},
|
10441 |
+
{
|
10442 |
+
"epoch": 0.12334036480952972,
|
10443 |
+
"grad_norm": 1373.43359375,
|
10444 |
+
"learning_rate": 9.987222814737287e-06,
|
10445 |
+
"loss": 139.1032,
|
10446 |
+
"step": 14910
|
10447 |
+
},
|
10448 |
+
{
|
10449 |
+
"epoch": 0.12342308805889896,
|
10450 |
+
"grad_norm": 2005.6689453125,
|
10451 |
+
"learning_rate": 9.987121853093982e-06,
|
10452 |
+
"loss": 179.2018,
|
10453 |
+
"step": 14920
|
10454 |
+
},
|
10455 |
+
{
|
10456 |
+
"epoch": 0.12350581130826818,
|
10457 |
+
"grad_norm": 1276.0216064453125,
|
10458 |
+
"learning_rate": 9.987020494648279e-06,
|
10459 |
+
"loss": 157.102,
|
10460 |
+
"step": 14930
|
10461 |
+
},
|
10462 |
+
{
|
10463 |
+
"epoch": 0.12358853455763742,
|
10464 |
+
"grad_norm": 1294.2474365234375,
|
10465 |
+
"learning_rate": 9.986918739408241e-06,
|
10466 |
+
"loss": 176.7196,
|
10467 |
+
"step": 14940
|
10468 |
+
},
|
10469 |
+
{
|
10470 |
+
"epoch": 0.12367125780700666,
|
10471 |
+
"grad_norm": 1715.0291748046875,
|
10472 |
+
"learning_rate": 9.986816587381966e-06,
|
10473 |
+
"loss": 150.2139,
|
10474 |
+
"step": 14950
|
10475 |
+
},
|
10476 |
+
{
|
10477 |
+
"epoch": 0.12375398105637589,
|
10478 |
+
"grad_norm": 1020.9821166992188,
|
10479 |
+
"learning_rate": 9.986714038577582e-06,
|
10480 |
+
"loss": 145.5669,
|
10481 |
+
"step": 14960
|
10482 |
+
},
|
10483 |
+
{
|
10484 |
+
"epoch": 0.12383670430574513,
|
10485 |
+
"grad_norm": 969.8027954101562,
|
10486 |
+
"learning_rate": 9.986611093003249e-06,
|
10487 |
+
"loss": 129.0563,
|
10488 |
+
"step": 14970
|
10489 |
+
},
|
10490 |
+
{
|
10491 |
+
"epoch": 0.12391942755511437,
|
10492 |
+
"grad_norm": 1188.594482421875,
|
10493 |
+
"learning_rate": 9.986507750667157e-06,
|
10494 |
+
"loss": 130.371,
|
10495 |
+
"step": 14980
|
10496 |
+
},
|
10497 |
+
{
|
10498 |
+
"epoch": 0.1240021508044836,
|
10499 |
+
"grad_norm": 2070.416015625,
|
10500 |
+
"learning_rate": 9.986404011577525e-06,
|
10501 |
+
"loss": 164.5774,
|
10502 |
+
"step": 14990
|
10503 |
+
},
|
10504 |
+
{
|
10505 |
+
"epoch": 0.12408487405385284,
|
10506 |
+
"grad_norm": 1122.52587890625,
|
10507 |
+
"learning_rate": 9.986299875742612e-06,
|
10508 |
+
"loss": 187.0694,
|
10509 |
+
"step": 15000
|
10510 |
+
},
|
10511 |
+
{
|
10512 |
+
"epoch": 0.12416759730322208,
|
10513 |
+
"grad_norm": 1119.6961669921875,
|
10514 |
+
"learning_rate": 9.986195343170703e-06,
|
10515 |
+
"loss": 180.5289,
|
10516 |
+
"step": 15010
|
10517 |
+
},
|
10518 |
+
{
|
10519 |
+
"epoch": 0.1242503205525913,
|
10520 |
+
"grad_norm": 1574.2568359375,
|
10521 |
+
"learning_rate": 9.986090413870114e-06,
|
10522 |
+
"loss": 144.7522,
|
10523 |
+
"step": 15020
|
10524 |
+
},
|
10525 |
+
{
|
10526 |
+
"epoch": 0.12433304380196054,
|
10527 |
+
"grad_norm": 1006.7045288085938,
|
10528 |
+
"learning_rate": 9.985985087849193e-06,
|
10529 |
+
"loss": 143.7221,
|
10530 |
+
"step": 15030
|
10531 |
+
},
|
10532 |
+
{
|
10533 |
+
"epoch": 0.12441576705132977,
|
10534 |
+
"grad_norm": 1300.2181396484375,
|
10535 |
+
"learning_rate": 9.98587936511632e-06,
|
10536 |
+
"loss": 150.1932,
|
10537 |
+
"step": 15040
|
10538 |
+
},
|
10539 |
+
{
|
10540 |
+
"epoch": 0.12449849030069901,
|
10541 |
+
"grad_norm": 821.7041625976562,
|
10542 |
+
"learning_rate": 9.98577324567991e-06,
|
10543 |
+
"loss": 139.0086,
|
10544 |
+
"step": 15050
|
10545 |
+
},
|
10546 |
+
{
|
10547 |
+
"epoch": 0.12458121355006825,
|
10548 |
+
"grad_norm": 830.7269287109375,
|
10549 |
+
"learning_rate": 9.985666729548404e-06,
|
10550 |
+
"loss": 146.4651,
|
10551 |
+
"step": 15060
|
10552 |
+
},
|
10553 |
+
{
|
10554 |
+
"epoch": 0.12466393679943748,
|
10555 |
+
"grad_norm": 1310.355224609375,
|
10556 |
+
"learning_rate": 9.985559816730277e-06,
|
10557 |
+
"loss": 141.5489,
|
10558 |
+
"step": 15070
|
10559 |
+
},
|
10560 |
+
{
|
10561 |
+
"epoch": 0.12474666004880672,
|
10562 |
+
"grad_norm": 1190.0335693359375,
|
10563 |
+
"learning_rate": 9.985452507234037e-06,
|
10564 |
+
"loss": 144.9001,
|
10565 |
+
"step": 15080
|
10566 |
+
},
|
10567 |
+
{
|
10568 |
+
"epoch": 0.12482938329817596,
|
10569 |
+
"grad_norm": 2714.714599609375,
|
10570 |
+
"learning_rate": 9.98534480106822e-06,
|
10571 |
+
"loss": 154.9118,
|
10572 |
+
"step": 15090
|
10573 |
+
},
|
10574 |
+
{
|
10575 |
+
"epoch": 0.12491210654754518,
|
10576 |
+
"grad_norm": 792.223388671875,
|
10577 |
+
"learning_rate": 9.985236698241396e-06,
|
10578 |
+
"loss": 149.7406,
|
10579 |
+
"step": 15100
|
10580 |
+
},
|
10581 |
+
{
|
10582 |
+
"epoch": 0.12499482979691443,
|
10583 |
+
"grad_norm": 1287.8345947265625,
|
10584 |
+
"learning_rate": 9.985128198762168e-06,
|
10585 |
+
"loss": 171.4261,
|
10586 |
+
"step": 15110
|
10587 |
+
},
|
10588 |
+
{
|
10589 |
+
"epoch": 0.12507755304628365,
|
10590 |
+
"grad_norm": 957.4619140625,
|
10591 |
+
"learning_rate": 9.98501930263917e-06,
|
10592 |
+
"loss": 234.8733,
|
10593 |
+
"step": 15120
|
10594 |
+
},
|
10595 |
+
{
|
10596 |
+
"epoch": 0.1251602762956529,
|
10597 |
+
"grad_norm": 862.0460205078125,
|
10598 |
+
"learning_rate": 9.984910009881062e-06,
|
10599 |
+
"loss": 112.6332,
|
10600 |
+
"step": 15130
|
10601 |
+
},
|
10602 |
+
{
|
10603 |
+
"epoch": 0.12524299954502213,
|
10604 |
+
"grad_norm": 1100.07080078125,
|
10605 |
+
"learning_rate": 9.984800320496542e-06,
|
10606 |
+
"loss": 139.7673,
|
10607 |
+
"step": 15140
|
10608 |
+
},
|
10609 |
+
{
|
10610 |
+
"epoch": 0.12532572279439136,
|
10611 |
+
"grad_norm": 1111.9737548828125,
|
10612 |
+
"learning_rate": 9.984690234494338e-06,
|
10613 |
+
"loss": 106.7051,
|
10614 |
+
"step": 15150
|
10615 |
+
},
|
10616 |
+
{
|
10617 |
+
"epoch": 0.12540844604376059,
|
10618 |
+
"grad_norm": 744.8794555664062,
|
10619 |
+
"learning_rate": 9.98457975188321e-06,
|
10620 |
+
"loss": 142.1312,
|
10621 |
+
"step": 15160
|
10622 |
+
},
|
10623 |
+
{
|
10624 |
+
"epoch": 0.12549116929312984,
|
10625 |
+
"grad_norm": 923.189697265625,
|
10626 |
+
"learning_rate": 9.984468872671945e-06,
|
10627 |
+
"loss": 139.3656,
|
10628 |
+
"step": 15170
|
10629 |
+
},
|
10630 |
+
{
|
10631 |
+
"epoch": 0.12557389254249907,
|
10632 |
+
"grad_norm": 1322.687255859375,
|
10633 |
+
"learning_rate": 9.984357596869369e-06,
|
10634 |
+
"loss": 148.6495,
|
10635 |
+
"step": 15180
|
10636 |
+
},
|
10637 |
+
{
|
10638 |
+
"epoch": 0.1256566157918683,
|
10639 |
+
"grad_norm": 882.3487548828125,
|
10640 |
+
"learning_rate": 9.984245924484334e-06,
|
10641 |
+
"loss": 141.7766,
|
10642 |
+
"step": 15190
|
10643 |
+
},
|
10644 |
+
{
|
10645 |
+
"epoch": 0.12573933904123755,
|
10646 |
+
"grad_norm": 725.9840698242188,
|
10647 |
+
"learning_rate": 9.984133855525723e-06,
|
10648 |
+
"loss": 138.5364,
|
10649 |
+
"step": 15200
|
10650 |
+
},
|
10651 |
+
{
|
10652 |
+
"epoch": 0.12582206229060677,
|
10653 |
+
"grad_norm": 1547.522705078125,
|
10654 |
+
"learning_rate": 9.984021390002458e-06,
|
10655 |
+
"loss": 136.4458,
|
10656 |
+
"step": 15210
|
10657 |
+
},
|
10658 |
+
{
|
10659 |
+
"epoch": 0.125904785539976,
|
10660 |
+
"grad_norm": 1425.56494140625,
|
10661 |
+
"learning_rate": 9.983908527923486e-06,
|
10662 |
+
"loss": 222.0387,
|
10663 |
+
"step": 15220
|
10664 |
+
},
|
10665 |
+
{
|
10666 |
+
"epoch": 0.12598750878934525,
|
10667 |
+
"grad_norm": 911.7035522460938,
|
10668 |
+
"learning_rate": 9.983795269297782e-06,
|
10669 |
+
"loss": 169.7902,
|
10670 |
+
"step": 15230
|
10671 |
+
},
|
10672 |
+
{
|
10673 |
+
"epoch": 0.12607023203871448,
|
10674 |
+
"grad_norm": 1066.0264892578125,
|
10675 |
+
"learning_rate": 9.983681614134363e-06,
|
10676 |
+
"loss": 122.5573,
|
10677 |
+
"step": 15240
|
10678 |
+
},
|
10679 |
+
{
|
10680 |
+
"epoch": 0.1261529552880837,
|
10681 |
+
"grad_norm": 1829.2509765625,
|
10682 |
+
"learning_rate": 9.98356756244227e-06,
|
10683 |
+
"loss": 154.7958,
|
10684 |
+
"step": 15250
|
10685 |
+
},
|
10686 |
+
{
|
10687 |
+
"epoch": 0.12623567853745296,
|
10688 |
+
"grad_norm": 1402.93408203125,
|
10689 |
+
"learning_rate": 9.983453114230575e-06,
|
10690 |
+
"loss": 145.442,
|
10691 |
+
"step": 15260
|
10692 |
+
},
|
10693 |
+
{
|
10694 |
+
"epoch": 0.1263184017868222,
|
10695 |
+
"grad_norm": 990.7800903320312,
|
10696 |
+
"learning_rate": 9.98333826950839e-06,
|
10697 |
+
"loss": 138.916,
|
10698 |
+
"step": 15270
|
10699 |
+
},
|
10700 |
+
{
|
10701 |
+
"epoch": 0.12640112503619141,
|
10702 |
+
"grad_norm": 861.1292724609375,
|
10703 |
+
"learning_rate": 9.983223028284847e-06,
|
10704 |
+
"loss": 152.3527,
|
10705 |
+
"step": 15280
|
10706 |
+
},
|
10707 |
+
{
|
10708 |
+
"epoch": 0.12648384828556067,
|
10709 |
+
"grad_norm": 887.3511962890625,
|
10710 |
+
"learning_rate": 9.983107390569118e-06,
|
10711 |
+
"loss": 129.7973,
|
10712 |
+
"step": 15290
|
10713 |
+
},
|
10714 |
+
{
|
10715 |
+
"epoch": 0.1265665715349299,
|
10716 |
+
"grad_norm": 1043.2373046875,
|
10717 |
+
"learning_rate": 9.982991356370404e-06,
|
10718 |
+
"loss": 116.1451,
|
10719 |
+
"step": 15300
|
10720 |
+
},
|
10721 |
+
{
|
10722 |
+
"epoch": 0.12664929478429912,
|
10723 |
+
"grad_norm": 1244.5079345703125,
|
10724 |
+
"learning_rate": 9.982874925697937e-06,
|
10725 |
+
"loss": 221.0664,
|
10726 |
+
"step": 15310
|
10727 |
+
},
|
10728 |
+
{
|
10729 |
+
"epoch": 0.12673201803366838,
|
10730 |
+
"grad_norm": 1715.1995849609375,
|
10731 |
+
"learning_rate": 9.982758098560978e-06,
|
10732 |
+
"loss": 186.7455,
|
10733 |
+
"step": 15320
|
10734 |
+
},
|
10735 |
+
{
|
10736 |
+
"epoch": 0.1268147412830376,
|
10737 |
+
"grad_norm": 679.9988403320312,
|
10738 |
+
"learning_rate": 9.982640874968827e-06,
|
10739 |
+
"loss": 171.8672,
|
10740 |
+
"step": 15330
|
10741 |
+
},
|
10742 |
+
{
|
10743 |
+
"epoch": 0.12689746453240683,
|
10744 |
+
"grad_norm": 595.40625,
|
10745 |
+
"learning_rate": 9.98252325493081e-06,
|
10746 |
+
"loss": 130.3511,
|
10747 |
+
"step": 15340
|
10748 |
+
},
|
10749 |
+
{
|
10750 |
+
"epoch": 0.12698018778177605,
|
10751 |
+
"grad_norm": 915.3275146484375,
|
10752 |
+
"learning_rate": 9.982405238456281e-06,
|
10753 |
+
"loss": 153.7831,
|
10754 |
+
"step": 15350
|
10755 |
+
},
|
10756 |
+
{
|
10757 |
+
"epoch": 0.1270629110311453,
|
10758 |
+
"grad_norm": 1383.0423583984375,
|
10759 |
+
"learning_rate": 9.982286825554636e-06,
|
10760 |
+
"loss": 155.1486,
|
10761 |
+
"step": 15360
|
10762 |
+
},
|
10763 |
+
{
|
10764 |
+
"epoch": 0.12714563428051454,
|
10765 |
+
"grad_norm": 1527.0670166015625,
|
10766 |
+
"learning_rate": 9.982168016235292e-06,
|
10767 |
+
"loss": 235.3831,
|
10768 |
+
"step": 15370
|
10769 |
+
},
|
10770 |
+
{
|
10771 |
+
"epoch": 0.12722835752988376,
|
10772 |
+
"grad_norm": 1168.0416259765625,
|
10773 |
+
"learning_rate": 9.982048810507706e-06,
|
10774 |
+
"loss": 175.3166,
|
10775 |
+
"step": 15380
|
10776 |
+
},
|
10777 |
+
{
|
10778 |
+
"epoch": 0.12731108077925302,
|
10779 |
+
"grad_norm": 1577.9107666015625,
|
10780 |
+
"learning_rate": 9.98192920838136e-06,
|
10781 |
+
"loss": 136.4098,
|
10782 |
+
"step": 15390
|
10783 |
+
},
|
10784 |
+
{
|
10785 |
+
"epoch": 0.12739380402862224,
|
10786 |
+
"grad_norm": 2239.125244140625,
|
10787 |
+
"learning_rate": 9.98180920986577e-06,
|
10788 |
+
"loss": 162.4811,
|
10789 |
+
"step": 15400
|
10790 |
+
},
|
10791 |
+
{
|
10792 |
+
"epoch": 0.12747652727799147,
|
10793 |
+
"grad_norm": 1140.6561279296875,
|
10794 |
+
"learning_rate": 9.981688814970485e-06,
|
10795 |
+
"loss": 159.3877,
|
10796 |
+
"step": 15410
|
10797 |
+
},
|
10798 |
+
{
|
10799 |
+
"epoch": 0.12755925052736072,
|
10800 |
+
"grad_norm": 929.8948974609375,
|
10801 |
+
"learning_rate": 9.981568023705085e-06,
|
10802 |
+
"loss": 113.0717,
|
10803 |
+
"step": 15420
|
10804 |
+
},
|
10805 |
+
{
|
10806 |
+
"epoch": 0.12764197377672995,
|
10807 |
+
"grad_norm": 1146.56396484375,
|
10808 |
+
"learning_rate": 9.981446836079178e-06,
|
10809 |
+
"loss": 121.9914,
|
10810 |
+
"step": 15430
|
10811 |
+
},
|
10812 |
+
{
|
10813 |
+
"epoch": 0.12772469702609918,
|
10814 |
+
"grad_norm": 877.8550415039062,
|
10815 |
+
"learning_rate": 9.981325252102408e-06,
|
10816 |
+
"loss": 173.6141,
|
10817 |
+
"step": 15440
|
10818 |
+
},
|
10819 |
+
{
|
10820 |
+
"epoch": 0.12780742027546843,
|
10821 |
+
"grad_norm": 969.0079956054688,
|
10822 |
+
"learning_rate": 9.98120327178445e-06,
|
10823 |
+
"loss": 178.5706,
|
10824 |
+
"step": 15450
|
10825 |
+
},
|
10826 |
+
{
|
10827 |
+
"epoch": 0.12789014352483766,
|
10828 |
+
"grad_norm": 1263.2391357421875,
|
10829 |
+
"learning_rate": 9.981080895135007e-06,
|
10830 |
+
"loss": 180.7431,
|
10831 |
+
"step": 15460
|
10832 |
+
},
|
10833 |
+
{
|
10834 |
+
"epoch": 0.12797286677420688,
|
10835 |
+
"grad_norm": 742.5184326171875,
|
10836 |
+
"learning_rate": 9.980958122163818e-06,
|
10837 |
+
"loss": 111.0224,
|
10838 |
+
"step": 15470
|
10839 |
+
},
|
10840 |
+
{
|
10841 |
+
"epoch": 0.12805559002357614,
|
10842 |
+
"grad_norm": 1423.19873046875,
|
10843 |
+
"learning_rate": 9.980834952880652e-06,
|
10844 |
+
"loss": 128.3473,
|
10845 |
+
"step": 15480
|
10846 |
+
},
|
10847 |
+
{
|
10848 |
+
"epoch": 0.12813831327294536,
|
10849 |
+
"grad_norm": 1583.2940673828125,
|
10850 |
+
"learning_rate": 9.980711387295306e-06,
|
10851 |
+
"loss": 149.955,
|
10852 |
+
"step": 15490
|
10853 |
+
},
|
10854 |
+
{
|
10855 |
+
"epoch": 0.1282210365223146,
|
10856 |
+
"grad_norm": 1052.7265625,
|
10857 |
+
"learning_rate": 9.980587425417612e-06,
|
10858 |
+
"loss": 159.9205,
|
10859 |
+
"step": 15500
|
10860 |
+
},
|
10861 |
+
{
|
10862 |
+
"epoch": 0.12830375977168385,
|
10863 |
+
"grad_norm": 2138.17724609375,
|
10864 |
+
"learning_rate": 9.980463067257437e-06,
|
10865 |
+
"loss": 169.7366,
|
10866 |
+
"step": 15510
|
10867 |
+
},
|
10868 |
+
{
|
10869 |
+
"epoch": 0.12838648302105307,
|
10870 |
+
"grad_norm": 1006.1878662109375,
|
10871 |
+
"learning_rate": 9.980338312824672e-06,
|
10872 |
+
"loss": 193.1612,
|
10873 |
+
"step": 15520
|
10874 |
+
},
|
10875 |
+
{
|
10876 |
+
"epoch": 0.1284692062704223,
|
10877 |
+
"grad_norm": 1047.7593994140625,
|
10878 |
+
"learning_rate": 9.980213162129244e-06,
|
10879 |
+
"loss": 175.5892,
|
10880 |
+
"step": 15530
|
10881 |
+
},
|
10882 |
+
{
|
10883 |
+
"epoch": 0.12855192951979152,
|
10884 |
+
"grad_norm": 1267.4644775390625,
|
10885 |
+
"learning_rate": 9.980087615181111e-06,
|
10886 |
+
"loss": 149.4357,
|
10887 |
+
"step": 15540
|
10888 |
+
},
|
10889 |
+
{
|
10890 |
+
"epoch": 0.12863465276916078,
|
10891 |
+
"grad_norm": 1171.0859375,
|
10892 |
+
"learning_rate": 9.979961671990263e-06,
|
10893 |
+
"loss": 165.6414,
|
10894 |
+
"step": 15550
|
10895 |
+
},
|
10896 |
+
{
|
10897 |
+
"epoch": 0.12871737601853,
|
10898 |
+
"grad_norm": 911.0418701171875,
|
10899 |
+
"learning_rate": 9.979835332566719e-06,
|
10900 |
+
"loss": 155.2462,
|
10901 |
+
"step": 15560
|
10902 |
+
},
|
10903 |
+
{
|
10904 |
+
"epoch": 0.12880009926789923,
|
10905 |
+
"grad_norm": 1016.1674194335938,
|
10906 |
+
"learning_rate": 9.97970859692053e-06,
|
10907 |
+
"loss": 142.4974,
|
10908 |
+
"step": 15570
|
10909 |
+
},
|
10910 |
+
{
|
10911 |
+
"epoch": 0.12888282251726849,
|
10912 |
+
"grad_norm": 653.232421875,
|
10913 |
+
"learning_rate": 9.979581465061784e-06,
|
10914 |
+
"loss": 155.5012,
|
10915 |
+
"step": 15580
|
10916 |
+
},
|
10917 |
+
{
|
10918 |
+
"epoch": 0.1289655457666377,
|
10919 |
+
"grad_norm": 1058.5008544921875,
|
10920 |
+
"learning_rate": 9.979453937000594e-06,
|
10921 |
+
"loss": 101.9423,
|
10922 |
+
"step": 15590
|
10923 |
+
},
|
10924 |
+
{
|
10925 |
+
"epoch": 0.12904826901600694,
|
10926 |
+
"grad_norm": 820.3455200195312,
|
10927 |
+
"learning_rate": 9.979326012747106e-06,
|
10928 |
+
"loss": 117.5258,
|
10929 |
+
"step": 15600
|
10930 |
+
},
|
10931 |
+
{
|
10932 |
+
"epoch": 0.1291309922653762,
|
10933 |
+
"grad_norm": 1352.7105712890625,
|
10934 |
+
"learning_rate": 9.9791976923115e-06,
|
10935 |
+
"loss": 99.8209,
|
10936 |
+
"step": 15610
|
10937 |
+
},
|
10938 |
+
{
|
10939 |
+
"epoch": 0.12921371551474542,
|
10940 |
+
"grad_norm": 1026.9713134765625,
|
10941 |
+
"learning_rate": 9.979068975703984e-06,
|
10942 |
+
"loss": 166.7305,
|
10943 |
+
"step": 15620
|
10944 |
+
},
|
10945 |
+
{
|
10946 |
+
"epoch": 0.12929643876411465,
|
10947 |
+
"grad_norm": 1245.904296875,
|
10948 |
+
"learning_rate": 9.978939862934802e-06,
|
10949 |
+
"loss": 126.3938,
|
10950 |
+
"step": 15630
|
10951 |
+
},
|
10952 |
+
{
|
10953 |
+
"epoch": 0.1293791620134839,
|
10954 |
+
"grad_norm": 1658.7640380859375,
|
10955 |
+
"learning_rate": 9.978810354014223e-06,
|
10956 |
+
"loss": 135.5493,
|
10957 |
+
"step": 15640
|
10958 |
+
},
|
10959 |
+
{
|
10960 |
+
"epoch": 0.12946188526285313,
|
10961 |
+
"grad_norm": 1517.82373046875,
|
10962 |
+
"learning_rate": 9.978680448952556e-06,
|
10963 |
+
"loss": 139.2036,
|
10964 |
+
"step": 15650
|
10965 |
+
},
|
10966 |
+
{
|
10967 |
+
"epoch": 0.12954460851222235,
|
10968 |
+
"grad_norm": 1112.2469482421875,
|
10969 |
+
"learning_rate": 9.978550147760133e-06,
|
10970 |
+
"loss": 127.4167,
|
10971 |
+
"step": 15660
|
10972 |
+
},
|
10973 |
+
{
|
10974 |
+
"epoch": 0.1296273317615916,
|
10975 |
+
"grad_norm": 995.6966552734375,
|
10976 |
+
"learning_rate": 9.978419450447325e-06,
|
10977 |
+
"loss": 128.1456,
|
10978 |
+
"step": 15670
|
10979 |
+
},
|
10980 |
+
{
|
10981 |
+
"epoch": 0.12971005501096083,
|
10982 |
+
"grad_norm": 785.5421142578125,
|
10983 |
+
"learning_rate": 9.978288357024527e-06,
|
10984 |
+
"loss": 142.7447,
|
10985 |
+
"step": 15680
|
10986 |
+
},
|
10987 |
+
{
|
10988 |
+
"epoch": 0.12979277826033006,
|
10989 |
+
"grad_norm": 3418.008544921875,
|
10990 |
+
"learning_rate": 9.978156867502173e-06,
|
10991 |
+
"loss": 161.2918,
|
10992 |
+
"step": 15690
|
10993 |
+
},
|
10994 |
+
{
|
10995 |
+
"epoch": 0.12987550150969931,
|
10996 |
+
"grad_norm": 1545.32568359375,
|
10997 |
+
"learning_rate": 9.978024981890724e-06,
|
10998 |
+
"loss": 107.0028,
|
10999 |
+
"step": 15700
|
11000 |
+
},
|
11001 |
+
{
|
11002 |
+
"epoch": 0.12995822475906854,
|
11003 |
+
"grad_norm": 963.7340087890625,
|
11004 |
+
"learning_rate": 9.977892700200673e-06,
|
11005 |
+
"loss": 140.569,
|
11006 |
+
"step": 15710
|
11007 |
+
},
|
11008 |
+
{
|
11009 |
+
"epoch": 0.13004094800843777,
|
11010 |
+
"grad_norm": 692.7611694335938,
|
11011 |
+
"learning_rate": 9.977760022442545e-06,
|
11012 |
+
"loss": 110.664,
|
11013 |
+
"step": 15720
|
11014 |
+
},
|
11015 |
+
{
|
11016 |
+
"epoch": 0.130123671257807,
|
11017 |
+
"grad_norm": 1015.7322998046875,
|
11018 |
+
"learning_rate": 9.977626948626897e-06,
|
11019 |
+
"loss": 158.9243,
|
11020 |
+
"step": 15730
|
11021 |
+
},
|
11022 |
+
{
|
11023 |
+
"epoch": 0.13020639450717625,
|
11024 |
+
"grad_norm": 1334.6917724609375,
|
11025 |
+
"learning_rate": 9.977493478764316e-06,
|
11026 |
+
"loss": 152.3215,
|
11027 |
+
"step": 15740
|
11028 |
+
},
|
11029 |
+
{
|
11030 |
+
"epoch": 0.13028911775654548,
|
11031 |
+
"grad_norm": 963.1575927734375,
|
11032 |
+
"learning_rate": 9.977359612865424e-06,
|
11033 |
+
"loss": 137.1868,
|
11034 |
+
"step": 15750
|
11035 |
+
},
|
11036 |
+
{
|
11037 |
+
"epoch": 0.1303718410059147,
|
11038 |
+
"grad_norm": 1332.0909423828125,
|
11039 |
+
"learning_rate": 9.97722535094087e-06,
|
11040 |
+
"loss": 130.2153,
|
11041 |
+
"step": 15760
|
11042 |
+
},
|
11043 |
+
{
|
11044 |
+
"epoch": 0.13045456425528396,
|
11045 |
+
"grad_norm": 864.9472045898438,
|
11046 |
+
"learning_rate": 9.977090693001336e-06,
|
11047 |
+
"loss": 142.6017,
|
11048 |
+
"step": 15770
|
11049 |
+
},
|
11050 |
+
{
|
11051 |
+
"epoch": 0.13053728750465318,
|
11052 |
+
"grad_norm": 1091.3128662109375,
|
11053 |
+
"learning_rate": 9.976955639057539e-06,
|
11054 |
+
"loss": 126.0693,
|
11055 |
+
"step": 15780
|
11056 |
+
},
|
11057 |
+
{
|
11058 |
+
"epoch": 0.1306200107540224,
|
11059 |
+
"grad_norm": 1137.7115478515625,
|
11060 |
+
"learning_rate": 9.976820189120223e-06,
|
11061 |
+
"loss": 147.4185,
|
11062 |
+
"step": 15790
|
11063 |
+
},
|
11064 |
+
{
|
11065 |
+
"epoch": 0.13070273400339166,
|
11066 |
+
"grad_norm": 1658.2982177734375,
|
11067 |
+
"learning_rate": 9.976684343200164e-06,
|
11068 |
+
"loss": 135.441,
|
11069 |
+
"step": 15800
|
11070 |
+
},
|
11071 |
+
{
|
11072 |
+
"epoch": 0.1307854572527609,
|
11073 |
+
"grad_norm": 1823.3642578125,
|
11074 |
+
"learning_rate": 9.976548101308173e-06,
|
11075 |
+
"loss": 138.8229,
|
11076 |
+
"step": 15810
|
11077 |
+
},
|
11078 |
+
{
|
11079 |
+
"epoch": 0.13086818050213012,
|
11080 |
+
"grad_norm": 1511.6375732421875,
|
11081 |
+
"learning_rate": 9.976411463455088e-06,
|
11082 |
+
"loss": 140.3549,
|
11083 |
+
"step": 15820
|
11084 |
+
},
|
11085 |
+
{
|
11086 |
+
"epoch": 0.13095090375149937,
|
11087 |
+
"grad_norm": 970.9559326171875,
|
11088 |
+
"learning_rate": 9.976274429651783e-06,
|
11089 |
+
"loss": 188.4605,
|
11090 |
+
"step": 15830
|
11091 |
+
},
|
11092 |
+
{
|
11093 |
+
"epoch": 0.1310336270008686,
|
11094 |
+
"grad_norm": 1540.7110595703125,
|
11095 |
+
"learning_rate": 9.976136999909156e-06,
|
11096 |
+
"loss": 106.2589,
|
11097 |
+
"step": 15840
|
11098 |
+
},
|
11099 |
+
{
|
11100 |
+
"epoch": 0.13111635025023782,
|
11101 |
+
"grad_norm": 829.7328491210938,
|
11102 |
+
"learning_rate": 9.97599917423815e-06,
|
11103 |
+
"loss": 166.2885,
|
11104 |
+
"step": 15850
|
11105 |
+
},
|
11106 |
+
{
|
11107 |
+
"epoch": 0.13119907349960708,
|
11108 |
+
"grad_norm": 0.0,
|
11109 |
+
"learning_rate": 9.975860952649724e-06,
|
11110 |
+
"loss": 180.9173,
|
11111 |
+
"step": 15860
|
11112 |
+
},
|
11113 |
+
{
|
11114 |
+
"epoch": 0.1312817967489763,
|
11115 |
+
"grad_norm": 1011.6902465820312,
|
11116 |
+
"learning_rate": 9.975722335154876e-06,
|
11117 |
+
"loss": 161.2201,
|
11118 |
+
"step": 15870
|
11119 |
+
},
|
11120 |
+
{
|
11121 |
+
"epoch": 0.13136451999834553,
|
11122 |
+
"grad_norm": 1166.7960205078125,
|
11123 |
+
"learning_rate": 9.975583321764638e-06,
|
11124 |
+
"loss": 144.3113,
|
11125 |
+
"step": 15880
|
11126 |
+
},
|
11127 |
+
{
|
11128 |
+
"epoch": 0.13144724324771476,
|
11129 |
+
"grad_norm": 1041.4771728515625,
|
11130 |
+
"learning_rate": 9.975443912490073e-06,
|
11131 |
+
"loss": 149.5042,
|
11132 |
+
"step": 15890
|
11133 |
+
},
|
11134 |
+
{
|
11135 |
+
"epoch": 0.131529966497084,
|
11136 |
+
"grad_norm": 2316.5087890625,
|
11137 |
+
"learning_rate": 9.975304107342268e-06,
|
11138 |
+
"loss": 179.2303,
|
11139 |
+
"step": 15900
|
11140 |
+
},
|
11141 |
+
{
|
11142 |
+
"epoch": 0.13161268974645324,
|
11143 |
+
"grad_norm": 692.1578369140625,
|
11144 |
+
"learning_rate": 9.97516390633235e-06,
|
11145 |
+
"loss": 133.4318,
|
11146 |
+
"step": 15910
|
11147 |
+
},
|
11148 |
+
{
|
11149 |
+
"epoch": 0.13169541299582246,
|
11150 |
+
"grad_norm": 1125.5006103515625,
|
11151 |
+
"learning_rate": 9.975023309471473e-06,
|
11152 |
+
"loss": 156.1001,
|
11153 |
+
"step": 15920
|
11154 |
+
},
|
11155 |
+
{
|
11156 |
+
"epoch": 0.13177813624519172,
|
11157 |
+
"grad_norm": 1064.159423828125,
|
11158 |
+
"learning_rate": 9.974882316770823e-06,
|
11159 |
+
"loss": 147.876,
|
11160 |
+
"step": 15930
|
11161 |
+
},
|
11162 |
+
{
|
11163 |
+
"epoch": 0.13186085949456094,
|
11164 |
+
"grad_norm": 1456.9761962890625,
|
11165 |
+
"learning_rate": 9.974740928241617e-06,
|
11166 |
+
"loss": 146.098,
|
11167 |
+
"step": 15940
|
11168 |
+
},
|
11169 |
+
{
|
11170 |
+
"epoch": 0.13194358274393017,
|
11171 |
+
"grad_norm": 1191.022705078125,
|
11172 |
+
"learning_rate": 9.974599143895107e-06,
|
11173 |
+
"loss": 139.6693,
|
11174 |
+
"step": 15950
|
11175 |
+
},
|
11176 |
+
{
|
11177 |
+
"epoch": 0.13202630599329943,
|
11178 |
+
"grad_norm": 2010.4088134765625,
|
11179 |
+
"learning_rate": 9.974456963742573e-06,
|
11180 |
+
"loss": 152.4677,
|
11181 |
+
"step": 15960
|
11182 |
+
},
|
11183 |
+
{
|
11184 |
+
"epoch": 0.13210902924266865,
|
11185 |
+
"grad_norm": 1077.85205078125,
|
11186 |
+
"learning_rate": 9.97431438779533e-06,
|
11187 |
+
"loss": 199.6097,
|
11188 |
+
"step": 15970
|
11189 |
+
},
|
11190 |
+
{
|
11191 |
+
"epoch": 0.13219175249203788,
|
11192 |
+
"grad_norm": 975.5093994140625,
|
11193 |
+
"learning_rate": 9.974171416064719e-06,
|
11194 |
+
"loss": 110.029,
|
11195 |
+
"step": 15980
|
11196 |
+
},
|
11197 |
+
{
|
11198 |
+
"epoch": 0.13227447574140713,
|
11199 |
+
"grad_norm": 1180.7437744140625,
|
11200 |
+
"learning_rate": 9.974028048562118e-06,
|
11201 |
+
"loss": 136.7102,
|
11202 |
+
"step": 15990
|
11203 |
+
},
|
11204 |
+
{
|
11205 |
+
"epoch": 0.13235719899077636,
|
11206 |
+
"grad_norm": 1241.7110595703125,
|
11207 |
+
"learning_rate": 9.973884285298932e-06,
|
11208 |
+
"loss": 154.7749,
|
11209 |
+
"step": 16000
|
11210 |
}
|
11211 |
],
|
11212 |
"logging_steps": 10,
|