Training in progress, step 80000, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step80000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step80000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/trainer_state.json +1403 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 42002584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e22f3699bd75f326a7211a895b158072c40e5429b5f801708c34b3d15b6c07bd
|
3 |
size 42002584
|
last-checkpoint/global_step80000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aafccbeefe22feaa9a903213e3acd94c58467fbdb6779a13a0639f3c1be0b8de
|
3 |
+
size 251710672
|
last-checkpoint/global_step80000/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0da991bc5b633a74a0163039e38b5a824f4901a888f1e0349b645c4d8e094161
|
3 |
+
size 153747385
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step80000
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea86b3ff53ff4d06bca8825449f1ad93bb976aacdc251afa318e48a3f722d79d
|
3 |
size 14244
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -9814,6 +9814,1406 @@
|
|
9814 |
"learning_rate": 0.00018000160000914293,
|
9815 |
"loss": 1.2261,
|
9816 |
"step": 70000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9817 |
}
|
9818 |
],
|
9819 |
"logging_steps": 50,
|
@@ -9833,7 +11233,7 @@
|
|
9833 |
"attributes": {}
|
9834 |
}
|
9835 |
},
|
9836 |
-
"total_flos":
|
9837 |
"train_batch_size": 2,
|
9838 |
"trial_name": null,
|
9839 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.3885588033320397,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 80000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
9814 |
"learning_rate": 0.00018000160000914293,
|
9815 |
"loss": 1.2261,
|
9816 |
"step": 70000
|
9817 |
+
},
|
9818 |
+
{
|
9819 |
+
"epoch": 2.091481802167617,
|
9820 |
+
"grad_norm": 4.837035179138184,
|
9821 |
+
"learning_rate": 0.00017998731421322407,
|
9822 |
+
"loss": 1.2857,
|
9823 |
+
"step": 70050
|
9824 |
+
},
|
9825 |
+
{
|
9826 |
+
"epoch": 2.0929746514197,
|
9827 |
+
"grad_norm": 4.402126789093018,
|
9828 |
+
"learning_rate": 0.00017997302841730526,
|
9829 |
+
"loss": 1.2614,
|
9830 |
+
"step": 70100
|
9831 |
+
},
|
9832 |
+
{
|
9833 |
+
"epoch": 2.094467500671782,
|
9834 |
+
"grad_norm": 3.9073615074157715,
|
9835 |
+
"learning_rate": 0.00017995874262138642,
|
9836 |
+
"loss": 1.2485,
|
9837 |
+
"step": 70150
|
9838 |
+
},
|
9839 |
+
{
|
9840 |
+
"epoch": 2.095960349923865,
|
9841 |
+
"grad_norm": 5.109444618225098,
|
9842 |
+
"learning_rate": 0.0001799444568254676,
|
9843 |
+
"loss": 1.2674,
|
9844 |
+
"step": 70200
|
9845 |
+
},
|
9846 |
+
{
|
9847 |
+
"epoch": 2.097453199175947,
|
9848 |
+
"grad_norm": 6.542872428894043,
|
9849 |
+
"learning_rate": 0.00017993017102954875,
|
9850 |
+
"loss": 1.2715,
|
9851 |
+
"step": 70250
|
9852 |
+
},
|
9853 |
+
{
|
9854 |
+
"epoch": 2.09894604842803,
|
9855 |
+
"grad_norm": 5.090799808502197,
|
9856 |
+
"learning_rate": 0.00017991588523362992,
|
9857 |
+
"loss": 1.2821,
|
9858 |
+
"step": 70300
|
9859 |
+
},
|
9860 |
+
{
|
9861 |
+
"epoch": 2.100438897680112,
|
9862 |
+
"grad_norm": 5.429018497467041,
|
9863 |
+
"learning_rate": 0.00017990159943771108,
|
9864 |
+
"loss": 1.2994,
|
9865 |
+
"step": 70350
|
9866 |
+
},
|
9867 |
+
{
|
9868 |
+
"epoch": 2.1019317469321948,
|
9869 |
+
"grad_norm": 8.873945236206055,
|
9870 |
+
"learning_rate": 0.00017988731364179225,
|
9871 |
+
"loss": 1.2912,
|
9872 |
+
"step": 70400
|
9873 |
+
},
|
9874 |
+
{
|
9875 |
+
"epoch": 2.1034245961842775,
|
9876 |
+
"grad_norm": 5.144947528839111,
|
9877 |
+
"learning_rate": 0.0001798730278458734,
|
9878 |
+
"loss": 1.2744,
|
9879 |
+
"step": 70450
|
9880 |
+
},
|
9881 |
+
{
|
9882 |
+
"epoch": 2.1049174454363597,
|
9883 |
+
"grad_norm": 5.346638202667236,
|
9884 |
+
"learning_rate": 0.00017985874204995458,
|
9885 |
+
"loss": 1.311,
|
9886 |
+
"step": 70500
|
9887 |
+
},
|
9888 |
+
{
|
9889 |
+
"epoch": 2.1064102946884424,
|
9890 |
+
"grad_norm": 5.384441375732422,
|
9891 |
+
"learning_rate": 0.00017984445625403574,
|
9892 |
+
"loss": 1.2662,
|
9893 |
+
"step": 70550
|
9894 |
+
},
|
9895 |
+
{
|
9896 |
+
"epoch": 2.1079031439405247,
|
9897 |
+
"grad_norm": 4.645060062408447,
|
9898 |
+
"learning_rate": 0.00017983017045811693,
|
9899 |
+
"loss": 1.2366,
|
9900 |
+
"step": 70600
|
9901 |
+
},
|
9902 |
+
{
|
9903 |
+
"epoch": 2.1093959931926074,
|
9904 |
+
"grad_norm": 4.9864349365234375,
|
9905 |
+
"learning_rate": 0.00017981588466219807,
|
9906 |
+
"loss": 1.2531,
|
9907 |
+
"step": 70650
|
9908 |
+
},
|
9909 |
+
{
|
9910 |
+
"epoch": 2.11088884244469,
|
9911 |
+
"grad_norm": 4.057093143463135,
|
9912 |
+
"learning_rate": 0.00017980159886627926,
|
9913 |
+
"loss": 1.2477,
|
9914 |
+
"step": 70700
|
9915 |
+
},
|
9916 |
+
{
|
9917 |
+
"epoch": 2.1123816916967724,
|
9918 |
+
"grad_norm": 4.121668815612793,
|
9919 |
+
"learning_rate": 0.0001797873130703604,
|
9920 |
+
"loss": 1.2533,
|
9921 |
+
"step": 70750
|
9922 |
+
},
|
9923 |
+
{
|
9924 |
+
"epoch": 2.113874540948855,
|
9925 |
+
"grad_norm": 5.140614032745361,
|
9926 |
+
"learning_rate": 0.0001797730272744416,
|
9927 |
+
"loss": 1.1833,
|
9928 |
+
"step": 70800
|
9929 |
+
},
|
9930 |
+
{
|
9931 |
+
"epoch": 2.1153673902009373,
|
9932 |
+
"grad_norm": 5.469926834106445,
|
9933 |
+
"learning_rate": 0.00017975874147852273,
|
9934 |
+
"loss": 1.2746,
|
9935 |
+
"step": 70850
|
9936 |
+
},
|
9937 |
+
{
|
9938 |
+
"epoch": 2.11686023945302,
|
9939 |
+
"grad_norm": 3.0967893600463867,
|
9940 |
+
"learning_rate": 0.00017974445568260392,
|
9941 |
+
"loss": 1.2449,
|
9942 |
+
"step": 70900
|
9943 |
+
},
|
9944 |
+
{
|
9945 |
+
"epoch": 2.1183530887051027,
|
9946 |
+
"grad_norm": 4.644883155822754,
|
9947 |
+
"learning_rate": 0.00017973016988668508,
|
9948 |
+
"loss": 1.2304,
|
9949 |
+
"step": 70950
|
9950 |
+
},
|
9951 |
+
{
|
9952 |
+
"epoch": 2.119845937957185,
|
9953 |
+
"grad_norm": 4.095651626586914,
|
9954 |
+
"learning_rate": 0.00017971588409076625,
|
9955 |
+
"loss": 1.3378,
|
9956 |
+
"step": 71000
|
9957 |
+
},
|
9958 |
+
{
|
9959 |
+
"epoch": 2.1213387872092677,
|
9960 |
+
"grad_norm": 5.656352519989014,
|
9961 |
+
"learning_rate": 0.00017970159829484741,
|
9962 |
+
"loss": 1.268,
|
9963 |
+
"step": 71050
|
9964 |
+
},
|
9965 |
+
{
|
9966 |
+
"epoch": 2.12283163646135,
|
9967 |
+
"grad_norm": 5.86411190032959,
|
9968 |
+
"learning_rate": 0.00017968731249892858,
|
9969 |
+
"loss": 1.3146,
|
9970 |
+
"step": 71100
|
9971 |
+
},
|
9972 |
+
{
|
9973 |
+
"epoch": 2.1243244857134327,
|
9974 |
+
"grad_norm": 4.751001358032227,
|
9975 |
+
"learning_rate": 0.00017967302670300974,
|
9976 |
+
"loss": 1.2861,
|
9977 |
+
"step": 71150
|
9978 |
+
},
|
9979 |
+
{
|
9980 |
+
"epoch": 2.1258173349655154,
|
9981 |
+
"grad_norm": 4.90724515914917,
|
9982 |
+
"learning_rate": 0.0001796587409070909,
|
9983 |
+
"loss": 1.2307,
|
9984 |
+
"step": 71200
|
9985 |
+
},
|
9986 |
+
{
|
9987 |
+
"epoch": 2.1273101842175977,
|
9988 |
+
"grad_norm": 4.715629577636719,
|
9989 |
+
"learning_rate": 0.00017964445511117207,
|
9990 |
+
"loss": 1.2602,
|
9991 |
+
"step": 71250
|
9992 |
+
},
|
9993 |
+
{
|
9994 |
+
"epoch": 2.1288030334696804,
|
9995 |
+
"grad_norm": 3.9297828674316406,
|
9996 |
+
"learning_rate": 0.00017963016931525324,
|
9997 |
+
"loss": 1.2705,
|
9998 |
+
"step": 71300
|
9999 |
+
},
|
10000 |
+
{
|
10001 |
+
"epoch": 2.1302958827217626,
|
10002 |
+
"grad_norm": 4.373486518859863,
|
10003 |
+
"learning_rate": 0.0001796158835193344,
|
10004 |
+
"loss": 1.264,
|
10005 |
+
"step": 71350
|
10006 |
+
},
|
10007 |
+
{
|
10008 |
+
"epoch": 2.1317887319738453,
|
10009 |
+
"grad_norm": 5.277180194854736,
|
10010 |
+
"learning_rate": 0.00017960159772341557,
|
10011 |
+
"loss": 1.2703,
|
10012 |
+
"step": 71400
|
10013 |
+
},
|
10014 |
+
{
|
10015 |
+
"epoch": 2.133281581225928,
|
10016 |
+
"grad_norm": 3.8668055534362793,
|
10017 |
+
"learning_rate": 0.00017958731192749673,
|
10018 |
+
"loss": 1.2655,
|
10019 |
+
"step": 71450
|
10020 |
+
},
|
10021 |
+
{
|
10022 |
+
"epoch": 2.1347744304780103,
|
10023 |
+
"grad_norm": 4.491888046264648,
|
10024 |
+
"learning_rate": 0.0001795730261315779,
|
10025 |
+
"loss": 1.2659,
|
10026 |
+
"step": 71500
|
10027 |
+
},
|
10028 |
+
{
|
10029 |
+
"epoch": 2.136267279730093,
|
10030 |
+
"grad_norm": 4.461415767669678,
|
10031 |
+
"learning_rate": 0.00017955874033565906,
|
10032 |
+
"loss": 1.3075,
|
10033 |
+
"step": 71550
|
10034 |
+
},
|
10035 |
+
{
|
10036 |
+
"epoch": 2.1377601289821753,
|
10037 |
+
"grad_norm": 5.062393665313721,
|
10038 |
+
"learning_rate": 0.00017954445453974023,
|
10039 |
+
"loss": 1.2328,
|
10040 |
+
"step": 71600
|
10041 |
+
},
|
10042 |
+
{
|
10043 |
+
"epoch": 2.139252978234258,
|
10044 |
+
"grad_norm": 5.79408073425293,
|
10045 |
+
"learning_rate": 0.0001795301687438214,
|
10046 |
+
"loss": 1.2528,
|
10047 |
+
"step": 71650
|
10048 |
+
},
|
10049 |
+
{
|
10050 |
+
"epoch": 2.1407458274863402,
|
10051 |
+
"grad_norm": 5.496243476867676,
|
10052 |
+
"learning_rate": 0.00017951588294790255,
|
10053 |
+
"loss": 1.2409,
|
10054 |
+
"step": 71700
|
10055 |
+
},
|
10056 |
+
{
|
10057 |
+
"epoch": 2.142238676738423,
|
10058 |
+
"grad_norm": 5.3767805099487305,
|
10059 |
+
"learning_rate": 0.00017950159715198375,
|
10060 |
+
"loss": 1.2383,
|
10061 |
+
"step": 71750
|
10062 |
+
},
|
10063 |
+
{
|
10064 |
+
"epoch": 2.1437315259905056,
|
10065 |
+
"grad_norm": 6.0737199783325195,
|
10066 |
+
"learning_rate": 0.00017948731135606488,
|
10067 |
+
"loss": 1.2936,
|
10068 |
+
"step": 71800
|
10069 |
+
},
|
10070 |
+
{
|
10071 |
+
"epoch": 2.145224375242588,
|
10072 |
+
"grad_norm": 5.363690376281738,
|
10073 |
+
"learning_rate": 0.00017947302556014608,
|
10074 |
+
"loss": 1.2774,
|
10075 |
+
"step": 71850
|
10076 |
+
},
|
10077 |
+
{
|
10078 |
+
"epoch": 2.1467172244946706,
|
10079 |
+
"grad_norm": 4.2048773765563965,
|
10080 |
+
"learning_rate": 0.0001794587397642272,
|
10081 |
+
"loss": 1.2476,
|
10082 |
+
"step": 71900
|
10083 |
+
},
|
10084 |
+
{
|
10085 |
+
"epoch": 2.148210073746753,
|
10086 |
+
"grad_norm": 5.775049686431885,
|
10087 |
+
"learning_rate": 0.0001794444539683084,
|
10088 |
+
"loss": 1.2646,
|
10089 |
+
"step": 71950
|
10090 |
+
},
|
10091 |
+
{
|
10092 |
+
"epoch": 2.1497029229988356,
|
10093 |
+
"grad_norm": 4.7285475730896,
|
10094 |
+
"learning_rate": 0.00017943016817238957,
|
10095 |
+
"loss": 1.3038,
|
10096 |
+
"step": 72000
|
10097 |
+
},
|
10098 |
+
{
|
10099 |
+
"epoch": 2.1511957722509183,
|
10100 |
+
"grad_norm": 4.310266971588135,
|
10101 |
+
"learning_rate": 0.00017941588237647073,
|
10102 |
+
"loss": 1.2478,
|
10103 |
+
"step": 72050
|
10104 |
+
},
|
10105 |
+
{
|
10106 |
+
"epoch": 2.1526886215030006,
|
10107 |
+
"grad_norm": 5.618819713592529,
|
10108 |
+
"learning_rate": 0.0001794015965805519,
|
10109 |
+
"loss": 1.2788,
|
10110 |
+
"step": 72100
|
10111 |
+
},
|
10112 |
+
{
|
10113 |
+
"epoch": 2.1541814707550833,
|
10114 |
+
"grad_norm": 5.624669075012207,
|
10115 |
+
"learning_rate": 0.00017938731078463306,
|
10116 |
+
"loss": 1.3053,
|
10117 |
+
"step": 72150
|
10118 |
+
},
|
10119 |
+
{
|
10120 |
+
"epoch": 2.1556743200071655,
|
10121 |
+
"grad_norm": 3.442650079727173,
|
10122 |
+
"learning_rate": 0.00017937302498871423,
|
10123 |
+
"loss": 1.2774,
|
10124 |
+
"step": 72200
|
10125 |
+
},
|
10126 |
+
{
|
10127 |
+
"epoch": 2.1571671692592482,
|
10128 |
+
"grad_norm": 5.232537746429443,
|
10129 |
+
"learning_rate": 0.0001793587391927954,
|
10130 |
+
"loss": 1.3006,
|
10131 |
+
"step": 72250
|
10132 |
+
},
|
10133 |
+
{
|
10134 |
+
"epoch": 2.1586600185113305,
|
10135 |
+
"grad_norm": 4.750761985778809,
|
10136 |
+
"learning_rate": 0.00017934445339687656,
|
10137 |
+
"loss": 1.2696,
|
10138 |
+
"step": 72300
|
10139 |
+
},
|
10140 |
+
{
|
10141 |
+
"epoch": 2.160152867763413,
|
10142 |
+
"grad_norm": 5.479654788970947,
|
10143 |
+
"learning_rate": 0.00017933016760095772,
|
10144 |
+
"loss": 1.2896,
|
10145 |
+
"step": 72350
|
10146 |
+
},
|
10147 |
+
{
|
10148 |
+
"epoch": 2.161645717015496,
|
10149 |
+
"grad_norm": 4.499126434326172,
|
10150 |
+
"learning_rate": 0.0001793158818050389,
|
10151 |
+
"loss": 1.2962,
|
10152 |
+
"step": 72400
|
10153 |
+
},
|
10154 |
+
{
|
10155 |
+
"epoch": 2.163138566267578,
|
10156 |
+
"grad_norm": 5.156326770782471,
|
10157 |
+
"learning_rate": 0.00017930159600912008,
|
10158 |
+
"loss": 1.2852,
|
10159 |
+
"step": 72450
|
10160 |
+
},
|
10161 |
+
{
|
10162 |
+
"epoch": 2.164631415519661,
|
10163 |
+
"grad_norm": 4.520088195800781,
|
10164 |
+
"learning_rate": 0.00017928731021320122,
|
10165 |
+
"loss": 1.2527,
|
10166 |
+
"step": 72500
|
10167 |
+
},
|
10168 |
+
{
|
10169 |
+
"epoch": 2.166124264771743,
|
10170 |
+
"grad_norm": 4.284939289093018,
|
10171 |
+
"learning_rate": 0.0001792730244172824,
|
10172 |
+
"loss": 1.3213,
|
10173 |
+
"step": 72550
|
10174 |
+
},
|
10175 |
+
{
|
10176 |
+
"epoch": 2.167617114023826,
|
10177 |
+
"grad_norm": 4.373524188995361,
|
10178 |
+
"learning_rate": 0.00017925873862136355,
|
10179 |
+
"loss": 1.2853,
|
10180 |
+
"step": 72600
|
10181 |
+
},
|
10182 |
+
{
|
10183 |
+
"epoch": 2.1691099632759085,
|
10184 |
+
"grad_norm": 3.9682133197784424,
|
10185 |
+
"learning_rate": 0.00017924445282544474,
|
10186 |
+
"loss": 1.2707,
|
10187 |
+
"step": 72650
|
10188 |
+
},
|
10189 |
+
{
|
10190 |
+
"epoch": 2.170602812527991,
|
10191 |
+
"grad_norm": 5.218033313751221,
|
10192 |
+
"learning_rate": 0.00017923016702952587,
|
10193 |
+
"loss": 1.2492,
|
10194 |
+
"step": 72700
|
10195 |
+
},
|
10196 |
+
{
|
10197 |
+
"epoch": 2.1720956617800735,
|
10198 |
+
"grad_norm": 4.900534152984619,
|
10199 |
+
"learning_rate": 0.00017921588123360707,
|
10200 |
+
"loss": 1.256,
|
10201 |
+
"step": 72750
|
10202 |
+
},
|
10203 |
+
{
|
10204 |
+
"epoch": 2.173588511032156,
|
10205 |
+
"grad_norm": 4.519747257232666,
|
10206 |
+
"learning_rate": 0.00017920159543768823,
|
10207 |
+
"loss": 1.2367,
|
10208 |
+
"step": 72800
|
10209 |
+
},
|
10210 |
+
{
|
10211 |
+
"epoch": 2.1750813602842385,
|
10212 |
+
"grad_norm": 4.465813159942627,
|
10213 |
+
"learning_rate": 0.0001791873096417694,
|
10214 |
+
"loss": 1.286,
|
10215 |
+
"step": 72850
|
10216 |
+
},
|
10217 |
+
{
|
10218 |
+
"epoch": 2.176574209536321,
|
10219 |
+
"grad_norm": 6.142160892486572,
|
10220 |
+
"learning_rate": 0.00017917302384585056,
|
10221 |
+
"loss": 1.2959,
|
10222 |
+
"step": 72900
|
10223 |
+
},
|
10224 |
+
{
|
10225 |
+
"epoch": 2.1780670587884035,
|
10226 |
+
"grad_norm": 3.851306200027466,
|
10227 |
+
"learning_rate": 0.00017915873804993173,
|
10228 |
+
"loss": 1.2511,
|
10229 |
+
"step": 72950
|
10230 |
+
},
|
10231 |
+
{
|
10232 |
+
"epoch": 2.179559908040486,
|
10233 |
+
"grad_norm": 5.0324788093566895,
|
10234 |
+
"learning_rate": 0.0001791444522540129,
|
10235 |
+
"loss": 1.2652,
|
10236 |
+
"step": 73000
|
10237 |
+
},
|
10238 |
+
{
|
10239 |
+
"epoch": 2.1810527572925684,
|
10240 |
+
"grad_norm": 4.664618492126465,
|
10241 |
+
"learning_rate": 0.00017913016645809405,
|
10242 |
+
"loss": 1.2639,
|
10243 |
+
"step": 73050
|
10244 |
+
},
|
10245 |
+
{
|
10246 |
+
"epoch": 2.182545606544651,
|
10247 |
+
"grad_norm": 4.418179988861084,
|
10248 |
+
"learning_rate": 0.00017911588066217522,
|
10249 |
+
"loss": 1.2948,
|
10250 |
+
"step": 73100
|
10251 |
+
},
|
10252 |
+
{
|
10253 |
+
"epoch": 2.184038455796734,
|
10254 |
+
"grad_norm": 4.895113945007324,
|
10255 |
+
"learning_rate": 0.00017910159486625638,
|
10256 |
+
"loss": 1.2283,
|
10257 |
+
"step": 73150
|
10258 |
+
},
|
10259 |
+
{
|
10260 |
+
"epoch": 2.185531305048816,
|
10261 |
+
"grad_norm": 4.2365946769714355,
|
10262 |
+
"learning_rate": 0.00017908730907033755,
|
10263 |
+
"loss": 1.2772,
|
10264 |
+
"step": 73200
|
10265 |
+
},
|
10266 |
+
{
|
10267 |
+
"epoch": 2.187024154300899,
|
10268 |
+
"grad_norm": 6.705420017242432,
|
10269 |
+
"learning_rate": 0.00017907302327441874,
|
10270 |
+
"loss": 1.273,
|
10271 |
+
"step": 73250
|
10272 |
+
},
|
10273 |
+
{
|
10274 |
+
"epoch": 2.188517003552981,
|
10275 |
+
"grad_norm": 5.094226360321045,
|
10276 |
+
"learning_rate": 0.00017905873747849988,
|
10277 |
+
"loss": 1.2391,
|
10278 |
+
"step": 73300
|
10279 |
+
},
|
10280 |
+
{
|
10281 |
+
"epoch": 2.1900098528050638,
|
10282 |
+
"grad_norm": 4.424404621124268,
|
10283 |
+
"learning_rate": 0.00017904445168258107,
|
10284 |
+
"loss": 1.2198,
|
10285 |
+
"step": 73350
|
10286 |
+
},
|
10287 |
+
{
|
10288 |
+
"epoch": 2.1915027020571465,
|
10289 |
+
"grad_norm": 5.0152201652526855,
|
10290 |
+
"learning_rate": 0.0001790301658866622,
|
10291 |
+
"loss": 1.3167,
|
10292 |
+
"step": 73400
|
10293 |
+
},
|
10294 |
+
{
|
10295 |
+
"epoch": 2.1929955513092287,
|
10296 |
+
"grad_norm": 4.916745662689209,
|
10297 |
+
"learning_rate": 0.0001790158800907434,
|
10298 |
+
"loss": 1.2482,
|
10299 |
+
"step": 73450
|
10300 |
+
},
|
10301 |
+
{
|
10302 |
+
"epoch": 2.1944884005613114,
|
10303 |
+
"grad_norm": 4.396121978759766,
|
10304 |
+
"learning_rate": 0.00017900159429482454,
|
10305 |
+
"loss": 1.2425,
|
10306 |
+
"step": 73500
|
10307 |
+
},
|
10308 |
+
{
|
10309 |
+
"epoch": 2.1959812498133937,
|
10310 |
+
"grad_norm": 4.289492607116699,
|
10311 |
+
"learning_rate": 0.00017898730849890573,
|
10312 |
+
"loss": 1.2647,
|
10313 |
+
"step": 73550
|
10314 |
+
},
|
10315 |
+
{
|
10316 |
+
"epoch": 2.1974740990654764,
|
10317 |
+
"grad_norm": 6.336133003234863,
|
10318 |
+
"learning_rate": 0.0001789730227029869,
|
10319 |
+
"loss": 1.2513,
|
10320 |
+
"step": 73600
|
10321 |
+
},
|
10322 |
+
{
|
10323 |
+
"epoch": 2.198966948317559,
|
10324 |
+
"grad_norm": 4.202422618865967,
|
10325 |
+
"learning_rate": 0.00017895873690706806,
|
10326 |
+
"loss": 1.2657,
|
10327 |
+
"step": 73650
|
10328 |
+
},
|
10329 |
+
{
|
10330 |
+
"epoch": 2.2004597975696414,
|
10331 |
+
"grad_norm": 4.372437953948975,
|
10332 |
+
"learning_rate": 0.00017894445111114922,
|
10333 |
+
"loss": 1.2432,
|
10334 |
+
"step": 73700
|
10335 |
+
},
|
10336 |
+
{
|
10337 |
+
"epoch": 2.201952646821724,
|
10338 |
+
"grad_norm": 5.008980751037598,
|
10339 |
+
"learning_rate": 0.0001789301653152304,
|
10340 |
+
"loss": 1.2848,
|
10341 |
+
"step": 73750
|
10342 |
+
},
|
10343 |
+
{
|
10344 |
+
"epoch": 2.2034454960738064,
|
10345 |
+
"grad_norm": 5.251125812530518,
|
10346 |
+
"learning_rate": 0.00017891587951931155,
|
10347 |
+
"loss": 1.3296,
|
10348 |
+
"step": 73800
|
10349 |
+
},
|
10350 |
+
{
|
10351 |
+
"epoch": 2.204938345325889,
|
10352 |
+
"grad_norm": 3.898746967315674,
|
10353 |
+
"learning_rate": 0.00017890159372339272,
|
10354 |
+
"loss": 1.2639,
|
10355 |
+
"step": 73850
|
10356 |
+
},
|
10357 |
+
{
|
10358 |
+
"epoch": 2.2064311945779713,
|
10359 |
+
"grad_norm": 5.178562164306641,
|
10360 |
+
"learning_rate": 0.00017888730792747388,
|
10361 |
+
"loss": 1.3065,
|
10362 |
+
"step": 73900
|
10363 |
+
},
|
10364 |
+
{
|
10365 |
+
"epoch": 2.207924043830054,
|
10366 |
+
"grad_norm": 5.667030334472656,
|
10367 |
+
"learning_rate": 0.00017887302213155505,
|
10368 |
+
"loss": 1.2954,
|
10369 |
+
"step": 73950
|
10370 |
+
},
|
10371 |
+
{
|
10372 |
+
"epoch": 2.2094168930821367,
|
10373 |
+
"grad_norm": 7.237538814544678,
|
10374 |
+
"learning_rate": 0.0001788587363356362,
|
10375 |
+
"loss": 1.2558,
|
10376 |
+
"step": 74000
|
10377 |
+
},
|
10378 |
+
{
|
10379 |
+
"epoch": 2.210909742334219,
|
10380 |
+
"grad_norm": 4.029843330383301,
|
10381 |
+
"learning_rate": 0.00017884445053971737,
|
10382 |
+
"loss": 1.3004,
|
10383 |
+
"step": 74050
|
10384 |
+
},
|
10385 |
+
{
|
10386 |
+
"epoch": 2.2124025915863017,
|
10387 |
+
"grad_norm": 5.686726093292236,
|
10388 |
+
"learning_rate": 0.00017883016474379854,
|
10389 |
+
"loss": 1.278,
|
10390 |
+
"step": 74100
|
10391 |
+
},
|
10392 |
+
{
|
10393 |
+
"epoch": 2.213895440838384,
|
10394 |
+
"grad_norm": 5.373737812042236,
|
10395 |
+
"learning_rate": 0.0001788158789478797,
|
10396 |
+
"loss": 1.3089,
|
10397 |
+
"step": 74150
|
10398 |
+
},
|
10399 |
+
{
|
10400 |
+
"epoch": 2.2153882900904667,
|
10401 |
+
"grad_norm": 4.493228912353516,
|
10402 |
+
"learning_rate": 0.00017880159315196087,
|
10403 |
+
"loss": 1.2856,
|
10404 |
+
"step": 74200
|
10405 |
+
},
|
10406 |
+
{
|
10407 |
+
"epoch": 2.2168811393425494,
|
10408 |
+
"grad_norm": 5.083597183227539,
|
10409 |
+
"learning_rate": 0.00017878730735604203,
|
10410 |
+
"loss": 1.2792,
|
10411 |
+
"step": 74250
|
10412 |
+
},
|
10413 |
+
{
|
10414 |
+
"epoch": 2.2183739885946316,
|
10415 |
+
"grad_norm": 5.1655144691467285,
|
10416 |
+
"learning_rate": 0.0001787730215601232,
|
10417 |
+
"loss": 1.2852,
|
10418 |
+
"step": 74300
|
10419 |
+
},
|
10420 |
+
{
|
10421 |
+
"epoch": 2.2198668378467143,
|
10422 |
+
"grad_norm": 3.2413179874420166,
|
10423 |
+
"learning_rate": 0.00017875873576420436,
|
10424 |
+
"loss": 1.2617,
|
10425 |
+
"step": 74350
|
10426 |
+
},
|
10427 |
+
{
|
10428 |
+
"epoch": 2.2213596870987966,
|
10429 |
+
"grad_norm": 4.30172061920166,
|
10430 |
+
"learning_rate": 0.00017874444996828555,
|
10431 |
+
"loss": 1.2949,
|
10432 |
+
"step": 74400
|
10433 |
+
},
|
10434 |
+
{
|
10435 |
+
"epoch": 2.2228525363508793,
|
10436 |
+
"grad_norm": 4.27219295501709,
|
10437 |
+
"learning_rate": 0.0001787301641723667,
|
10438 |
+
"loss": 1.2293,
|
10439 |
+
"step": 74450
|
10440 |
+
},
|
10441 |
+
{
|
10442 |
+
"epoch": 2.2243453856029616,
|
10443 |
+
"grad_norm": 5.430578708648682,
|
10444 |
+
"learning_rate": 0.00017871587837644788,
|
10445 |
+
"loss": 1.2662,
|
10446 |
+
"step": 74500
|
10447 |
+
},
|
10448 |
+
{
|
10449 |
+
"epoch": 2.2258382348550443,
|
10450 |
+
"grad_norm": 5.0630292892456055,
|
10451 |
+
"learning_rate": 0.00017870159258052902,
|
10452 |
+
"loss": 1.308,
|
10453 |
+
"step": 74550
|
10454 |
+
},
|
10455 |
+
{
|
10456 |
+
"epoch": 2.227331084107127,
|
10457 |
+
"grad_norm": 4.326575756072998,
|
10458 |
+
"learning_rate": 0.0001786873067846102,
|
10459 |
+
"loss": 1.3244,
|
10460 |
+
"step": 74600
|
10461 |
+
},
|
10462 |
+
{
|
10463 |
+
"epoch": 2.2288239333592093,
|
10464 |
+
"grad_norm": 4.2859206199646,
|
10465 |
+
"learning_rate": 0.00017867302098869138,
|
10466 |
+
"loss": 1.2879,
|
10467 |
+
"step": 74650
|
10468 |
+
},
|
10469 |
+
{
|
10470 |
+
"epoch": 2.230316782611292,
|
10471 |
+
"grad_norm": 3.931136131286621,
|
10472 |
+
"learning_rate": 0.00017865873519277254,
|
10473 |
+
"loss": 1.3046,
|
10474 |
+
"step": 74700
|
10475 |
+
},
|
10476 |
+
{
|
10477 |
+
"epoch": 2.231809631863374,
|
10478 |
+
"grad_norm": 3.9645185470581055,
|
10479 |
+
"learning_rate": 0.0001786444493968537,
|
10480 |
+
"loss": 1.2712,
|
10481 |
+
"step": 74750
|
10482 |
+
},
|
10483 |
+
{
|
10484 |
+
"epoch": 2.233302481115457,
|
10485 |
+
"grad_norm": 5.313785076141357,
|
10486 |
+
"learning_rate": 0.00017863016360093487,
|
10487 |
+
"loss": 1.2514,
|
10488 |
+
"step": 74800
|
10489 |
+
},
|
10490 |
+
{
|
10491 |
+
"epoch": 2.2347953303675396,
|
10492 |
+
"grad_norm": 4.994843482971191,
|
10493 |
+
"learning_rate": 0.00017861587780501604,
|
10494 |
+
"loss": 1.2803,
|
10495 |
+
"step": 74850
|
10496 |
+
},
|
10497 |
+
{
|
10498 |
+
"epoch": 2.236288179619622,
|
10499 |
+
"grad_norm": 6.394142150878906,
|
10500 |
+
"learning_rate": 0.0001786015920090972,
|
10501 |
+
"loss": 1.2424,
|
10502 |
+
"step": 74900
|
10503 |
+
},
|
10504 |
+
{
|
10505 |
+
"epoch": 2.2377810288717046,
|
10506 |
+
"grad_norm": 4.67982816696167,
|
10507 |
+
"learning_rate": 0.00017858730621317837,
|
10508 |
+
"loss": 1.2775,
|
10509 |
+
"step": 74950
|
10510 |
+
},
|
10511 |
+
{
|
10512 |
+
"epoch": 2.239273878123787,
|
10513 |
+
"grad_norm": 5.697408199310303,
|
10514 |
+
"learning_rate": 0.00017857302041725953,
|
10515 |
+
"loss": 1.2665,
|
10516 |
+
"step": 75000
|
10517 |
+
},
|
10518 |
+
{
|
10519 |
+
"epoch": 2.2407667273758696,
|
10520 |
+
"grad_norm": 6.522719383239746,
|
10521 |
+
"learning_rate": 0.0001785587346213407,
|
10522 |
+
"loss": 1.3017,
|
10523 |
+
"step": 75050
|
10524 |
+
},
|
10525 |
+
{
|
10526 |
+
"epoch": 2.2422595766279523,
|
10527 |
+
"grad_norm": 3.7727861404418945,
|
10528 |
+
"learning_rate": 0.00017854444882542186,
|
10529 |
+
"loss": 1.3516,
|
10530 |
+
"step": 75100
|
10531 |
+
},
|
10532 |
+
{
|
10533 |
+
"epoch": 2.2437524258800345,
|
10534 |
+
"grad_norm": 4.810925483703613,
|
10535 |
+
"learning_rate": 0.00017853016302950302,
|
10536 |
+
"loss": 1.277,
|
10537 |
+
"step": 75150
|
10538 |
+
},
|
10539 |
+
{
|
10540 |
+
"epoch": 2.2452452751321172,
|
10541 |
+
"grad_norm": 3.6915650367736816,
|
10542 |
+
"learning_rate": 0.00017851587723358422,
|
10543 |
+
"loss": 1.3159,
|
10544 |
+
"step": 75200
|
10545 |
+
},
|
10546 |
+
{
|
10547 |
+
"epoch": 2.2467381243841995,
|
10548 |
+
"grad_norm": 4.427685260772705,
|
10549 |
+
"learning_rate": 0.00017850159143766535,
|
10550 |
+
"loss": 1.2725,
|
10551 |
+
"step": 75250
|
10552 |
+
},
|
10553 |
+
{
|
10554 |
+
"epoch": 2.248230973636282,
|
10555 |
+
"grad_norm": 7.222316265106201,
|
10556 |
+
"learning_rate": 0.00017848730564174655,
|
10557 |
+
"loss": 1.3272,
|
10558 |
+
"step": 75300
|
10559 |
+
},
|
10560 |
+
{
|
10561 |
+
"epoch": 2.249723822888365,
|
10562 |
+
"grad_norm": 4.113325595855713,
|
10563 |
+
"learning_rate": 0.00017847301984582768,
|
10564 |
+
"loss": 1.2861,
|
10565 |
+
"step": 75350
|
10566 |
+
},
|
10567 |
+
{
|
10568 |
+
"epoch": 2.251216672140447,
|
10569 |
+
"grad_norm": 3.8266818523406982,
|
10570 |
+
"learning_rate": 0.00017845873404990887,
|
10571 |
+
"loss": 1.2688,
|
10572 |
+
"step": 75400
|
10573 |
+
},
|
10574 |
+
{
|
10575 |
+
"epoch": 2.25270952139253,
|
10576 |
+
"grad_norm": 5.281492710113525,
|
10577 |
+
"learning_rate": 0.00017844444825399004,
|
10578 |
+
"loss": 1.2743,
|
10579 |
+
"step": 75450
|
10580 |
+
},
|
10581 |
+
{
|
10582 |
+
"epoch": 2.254202370644612,
|
10583 |
+
"grad_norm": 5.222156047821045,
|
10584 |
+
"learning_rate": 0.0001784301624580712,
|
10585 |
+
"loss": 1.2324,
|
10586 |
+
"step": 75500
|
10587 |
+
},
|
10588 |
+
{
|
10589 |
+
"epoch": 2.255695219896695,
|
10590 |
+
"grad_norm": 4.961234092712402,
|
10591 |
+
"learning_rate": 0.00017841587666215237,
|
10592 |
+
"loss": 1.3248,
|
10593 |
+
"step": 75550
|
10594 |
+
},
|
10595 |
+
{
|
10596 |
+
"epoch": 2.2571880691487776,
|
10597 |
+
"grad_norm": 5.501556396484375,
|
10598 |
+
"learning_rate": 0.00017840159086623353,
|
10599 |
+
"loss": 1.2868,
|
10600 |
+
"step": 75600
|
10601 |
+
},
|
10602 |
+
{
|
10603 |
+
"epoch": 2.25868091840086,
|
10604 |
+
"grad_norm": 4.440338611602783,
|
10605 |
+
"learning_rate": 0.0001783873050703147,
|
10606 |
+
"loss": 1.237,
|
10607 |
+
"step": 75650
|
10608 |
+
},
|
10609 |
+
{
|
10610 |
+
"epoch": 2.2601737676529425,
|
10611 |
+
"grad_norm": 7.373712539672852,
|
10612 |
+
"learning_rate": 0.00017837301927439586,
|
10613 |
+
"loss": 1.2931,
|
10614 |
+
"step": 75700
|
10615 |
+
},
|
10616 |
+
{
|
10617 |
+
"epoch": 2.261666616905025,
|
10618 |
+
"grad_norm": 4.324766635894775,
|
10619 |
+
"learning_rate": 0.00017835873347847703,
|
10620 |
+
"loss": 1.2762,
|
10621 |
+
"step": 75750
|
10622 |
+
},
|
10623 |
+
{
|
10624 |
+
"epoch": 2.2631594661571075,
|
10625 |
+
"grad_norm": 7.707190990447998,
|
10626 |
+
"learning_rate": 0.0001783444476825582,
|
10627 |
+
"loss": 1.2565,
|
10628 |
+
"step": 75800
|
10629 |
+
},
|
10630 |
+
{
|
10631 |
+
"epoch": 2.26465231540919,
|
10632 |
+
"grad_norm": 3.9111223220825195,
|
10633 |
+
"learning_rate": 0.00017833016188663936,
|
10634 |
+
"loss": 1.2134,
|
10635 |
+
"step": 75850
|
10636 |
+
},
|
10637 |
+
{
|
10638 |
+
"epoch": 2.2661451646612725,
|
10639 |
+
"grad_norm": 4.735013961791992,
|
10640 |
+
"learning_rate": 0.00017831587609072055,
|
10641 |
+
"loss": 1.1908,
|
10642 |
+
"step": 75900
|
10643 |
+
},
|
10644 |
+
{
|
10645 |
+
"epoch": 2.267638013913355,
|
10646 |
+
"grad_norm": 5.743906021118164,
|
10647 |
+
"learning_rate": 0.00017830159029480169,
|
10648 |
+
"loss": 1.294,
|
10649 |
+
"step": 75950
|
10650 |
+
},
|
10651 |
+
{
|
10652 |
+
"epoch": 2.2691308631654374,
|
10653 |
+
"grad_norm": 3.792663097381592,
|
10654 |
+
"learning_rate": 0.00017828730449888288,
|
10655 |
+
"loss": 1.3725,
|
10656 |
+
"step": 76000
|
10657 |
+
},
|
10658 |
+
{
|
10659 |
+
"epoch": 2.27062371241752,
|
10660 |
+
"grad_norm": 4.77357292175293,
|
10661 |
+
"learning_rate": 0.00017827301870296402,
|
10662 |
+
"loss": 1.2511,
|
10663 |
+
"step": 76050
|
10664 |
+
},
|
10665 |
+
{
|
10666 |
+
"epoch": 2.272116561669603,
|
10667 |
+
"grad_norm": 5.1196675300598145,
|
10668 |
+
"learning_rate": 0.0001782587329070452,
|
10669 |
+
"loss": 1.3099,
|
10670 |
+
"step": 76100
|
10671 |
+
},
|
10672 |
+
{
|
10673 |
+
"epoch": 2.273609410921685,
|
10674 |
+
"grad_norm": 5.5518574714660645,
|
10675 |
+
"learning_rate": 0.00017824444711112634,
|
10676 |
+
"loss": 1.2841,
|
10677 |
+
"step": 76150
|
10678 |
+
},
|
10679 |
+
{
|
10680 |
+
"epoch": 2.275102260173768,
|
10681 |
+
"grad_norm": 4.075642108917236,
|
10682 |
+
"learning_rate": 0.00017823016131520754,
|
10683 |
+
"loss": 1.2857,
|
10684 |
+
"step": 76200
|
10685 |
+
},
|
10686 |
+
{
|
10687 |
+
"epoch": 2.27659510942585,
|
10688 |
+
"grad_norm": 6.330162048339844,
|
10689 |
+
"learning_rate": 0.0001782158755192887,
|
10690 |
+
"loss": 1.253,
|
10691 |
+
"step": 76250
|
10692 |
+
},
|
10693 |
+
{
|
10694 |
+
"epoch": 2.278087958677933,
|
10695 |
+
"grad_norm": 3.8504278659820557,
|
10696 |
+
"learning_rate": 0.00017820158972336987,
|
10697 |
+
"loss": 1.3167,
|
10698 |
+
"step": 76300
|
10699 |
+
},
|
10700 |
+
{
|
10701 |
+
"epoch": 2.279580807930015,
|
10702 |
+
"grad_norm": 5.074968338012695,
|
10703 |
+
"learning_rate": 0.00017818730392745103,
|
10704 |
+
"loss": 1.3109,
|
10705 |
+
"step": 76350
|
10706 |
+
},
|
10707 |
+
{
|
10708 |
+
"epoch": 2.2810736571820978,
|
10709 |
+
"grad_norm": 4.519435405731201,
|
10710 |
+
"learning_rate": 0.0001781730181315322,
|
10711 |
+
"loss": 1.2721,
|
10712 |
+
"step": 76400
|
10713 |
+
},
|
10714 |
+
{
|
10715 |
+
"epoch": 2.2825665064341805,
|
10716 |
+
"grad_norm": 5.285332202911377,
|
10717 |
+
"learning_rate": 0.00017815873233561336,
|
10718 |
+
"loss": 1.3091,
|
10719 |
+
"step": 76450
|
10720 |
+
},
|
10721 |
+
{
|
10722 |
+
"epoch": 2.2840593556862627,
|
10723 |
+
"grad_norm": 6.193671226501465,
|
10724 |
+
"learning_rate": 0.00017814444653969452,
|
10725 |
+
"loss": 1.2676,
|
10726 |
+
"step": 76500
|
10727 |
+
},
|
10728 |
+
{
|
10729 |
+
"epoch": 2.2855522049383454,
|
10730 |
+
"grad_norm": 4.4518961906433105,
|
10731 |
+
"learning_rate": 0.0001781301607437757,
|
10732 |
+
"loss": 1.2637,
|
10733 |
+
"step": 76550
|
10734 |
+
},
|
10735 |
+
{
|
10736 |
+
"epoch": 2.2870450541904277,
|
10737 |
+
"grad_norm": 5.584630966186523,
|
10738 |
+
"learning_rate": 0.00017811587494785685,
|
10739 |
+
"loss": 1.2906,
|
10740 |
+
"step": 76600
|
10741 |
+
},
|
10742 |
+
{
|
10743 |
+
"epoch": 2.2885379034425104,
|
10744 |
+
"grad_norm": 5.528042793273926,
|
10745 |
+
"learning_rate": 0.00017810158915193802,
|
10746 |
+
"loss": 1.2701,
|
10747 |
+
"step": 76650
|
10748 |
+
},
|
10749 |
+
{
|
10750 |
+
"epoch": 2.2900307526945927,
|
10751 |
+
"grad_norm": 5.087693214416504,
|
10752 |
+
"learning_rate": 0.00017808730335601918,
|
10753 |
+
"loss": 1.33,
|
10754 |
+
"step": 76700
|
10755 |
+
},
|
10756 |
+
{
|
10757 |
+
"epoch": 2.2915236019466754,
|
10758 |
+
"grad_norm": 4.278517723083496,
|
10759 |
+
"learning_rate": 0.00017807301756010035,
|
10760 |
+
"loss": 1.2854,
|
10761 |
+
"step": 76750
|
10762 |
+
},
|
10763 |
+
{
|
10764 |
+
"epoch": 2.293016451198758,
|
10765 |
+
"grad_norm": 4.385780334472656,
|
10766 |
+
"learning_rate": 0.0001780587317641815,
|
10767 |
+
"loss": 1.2567,
|
10768 |
+
"step": 76800
|
10769 |
+
},
|
10770 |
+
{
|
10771 |
+
"epoch": 2.2945093004508403,
|
10772 |
+
"grad_norm": 4.630320072174072,
|
10773 |
+
"learning_rate": 0.00017804444596826268,
|
10774 |
+
"loss": 1.2706,
|
10775 |
+
"step": 76850
|
10776 |
+
},
|
10777 |
+
{
|
10778 |
+
"epoch": 2.296002149702923,
|
10779 |
+
"grad_norm": 6.519657135009766,
|
10780 |
+
"learning_rate": 0.00017803016017234384,
|
10781 |
+
"loss": 1.2892,
|
10782 |
+
"step": 76900
|
10783 |
+
},
|
10784 |
+
{
|
10785 |
+
"epoch": 2.2974949989550053,
|
10786 |
+
"grad_norm": 5.163790702819824,
|
10787 |
+
"learning_rate": 0.000178015874376425,
|
10788 |
+
"loss": 1.3251,
|
10789 |
+
"step": 76950
|
10790 |
+
},
|
10791 |
+
{
|
10792 |
+
"epoch": 2.298987848207088,
|
10793 |
+
"grad_norm": 4.371760368347168,
|
10794 |
+
"learning_rate": 0.00017800158858050617,
|
10795 |
+
"loss": 1.3014,
|
10796 |
+
"step": 77000
|
10797 |
+
},
|
10798 |
+
{
|
10799 |
+
"epoch": 2.3004806974591707,
|
10800 |
+
"grad_norm": 4.700384616851807,
|
10801 |
+
"learning_rate": 0.00017798730278458736,
|
10802 |
+
"loss": 1.2648,
|
10803 |
+
"step": 77050
|
10804 |
+
},
|
10805 |
+
{
|
10806 |
+
"epoch": 2.301973546711253,
|
10807 |
+
"grad_norm": 4.602656364440918,
|
10808 |
+
"learning_rate": 0.0001779730169886685,
|
10809 |
+
"loss": 1.3043,
|
10810 |
+
"step": 77100
|
10811 |
+
},
|
10812 |
+
{
|
10813 |
+
"epoch": 2.3034663959633357,
|
10814 |
+
"grad_norm": 4.436239719390869,
|
10815 |
+
"learning_rate": 0.0001779587311927497,
|
10816 |
+
"loss": 1.2702,
|
10817 |
+
"step": 77150
|
10818 |
+
},
|
10819 |
+
{
|
10820 |
+
"epoch": 2.304959245215418,
|
10821 |
+
"grad_norm": 5.805758476257324,
|
10822 |
+
"learning_rate": 0.00017794444539683083,
|
10823 |
+
"loss": 1.2556,
|
10824 |
+
"step": 77200
|
10825 |
+
},
|
10826 |
+
{
|
10827 |
+
"epoch": 2.3064520944675007,
|
10828 |
+
"grad_norm": 5.682462215423584,
|
10829 |
+
"learning_rate": 0.00017793015960091202,
|
10830 |
+
"loss": 1.2422,
|
10831 |
+
"step": 77250
|
10832 |
+
},
|
10833 |
+
{
|
10834 |
+
"epoch": 2.3079449437195834,
|
10835 |
+
"grad_norm": 5.270015716552734,
|
10836 |
+
"learning_rate": 0.00017791587380499316,
|
10837 |
+
"loss": 1.2666,
|
10838 |
+
"step": 77300
|
10839 |
+
},
|
10840 |
+
{
|
10841 |
+
"epoch": 2.3094377929716656,
|
10842 |
+
"grad_norm": 5.771697998046875,
|
10843 |
+
"learning_rate": 0.00017790158800907435,
|
10844 |
+
"loss": 1.2735,
|
10845 |
+
"step": 77350
|
10846 |
+
},
|
10847 |
+
{
|
10848 |
+
"epoch": 2.3109306422237483,
|
10849 |
+
"grad_norm": 4.319996356964111,
|
10850 |
+
"learning_rate": 0.00017788730221315551,
|
10851 |
+
"loss": 1.2362,
|
10852 |
+
"step": 77400
|
10853 |
+
},
|
10854 |
+
{
|
10855 |
+
"epoch": 2.3124234914758306,
|
10856 |
+
"grad_norm": 7.646838188171387,
|
10857 |
+
"learning_rate": 0.00017787301641723668,
|
10858 |
+
"loss": 1.2998,
|
10859 |
+
"step": 77450
|
10860 |
+
},
|
10861 |
+
{
|
10862 |
+
"epoch": 2.3139163407279133,
|
10863 |
+
"grad_norm": 4.850773334503174,
|
10864 |
+
"learning_rate": 0.00017785873062131784,
|
10865 |
+
"loss": 1.2505,
|
10866 |
+
"step": 77500
|
10867 |
+
},
|
10868 |
+
{
|
10869 |
+
"epoch": 2.315409189979996,
|
10870 |
+
"grad_norm": 8.903724670410156,
|
10871 |
+
"learning_rate": 0.000177844444825399,
|
10872 |
+
"loss": 1.2483,
|
10873 |
+
"step": 77550
|
10874 |
+
},
|
10875 |
+
{
|
10876 |
+
"epoch": 2.3169020392320783,
|
10877 |
+
"grad_norm": 4.112529754638672,
|
10878 |
+
"learning_rate": 0.00017783015902948017,
|
10879 |
+
"loss": 1.2622,
|
10880 |
+
"step": 77600
|
10881 |
+
},
|
10882 |
+
{
|
10883 |
+
"epoch": 2.318394888484161,
|
10884 |
+
"grad_norm": 5.173572063446045,
|
10885 |
+
"learning_rate": 0.00017781587323356134,
|
10886 |
+
"loss": 1.2783,
|
10887 |
+
"step": 77650
|
10888 |
+
},
|
10889 |
+
{
|
10890 |
+
"epoch": 2.3198877377362432,
|
10891 |
+
"grad_norm": 3.9499335289001465,
|
10892 |
+
"learning_rate": 0.0001778015874376425,
|
10893 |
+
"loss": 1.3203,
|
10894 |
+
"step": 77700
|
10895 |
+
},
|
10896 |
+
{
|
10897 |
+
"epoch": 2.321380586988326,
|
10898 |
+
"grad_norm": 4.4116926193237305,
|
10899 |
+
"learning_rate": 0.00017778730164172367,
|
10900 |
+
"loss": 1.2819,
|
10901 |
+
"step": 77750
|
10902 |
+
},
|
10903 |
+
{
|
10904 |
+
"epoch": 2.3228734362404087,
|
10905 |
+
"grad_norm": 5.434197902679443,
|
10906 |
+
"learning_rate": 0.00017777301584580483,
|
10907 |
+
"loss": 1.2591,
|
10908 |
+
"step": 77800
|
10909 |
+
},
|
10910 |
+
{
|
10911 |
+
"epoch": 2.324366285492491,
|
10912 |
+
"grad_norm": 3.9472904205322266,
|
10913 |
+
"learning_rate": 0.00017775873004988602,
|
10914 |
+
"loss": 1.2609,
|
10915 |
+
"step": 77850
|
10916 |
+
},
|
10917 |
+
{
|
10918 |
+
"epoch": 2.3258591347445736,
|
10919 |
+
"grad_norm": 4.981082439422607,
|
10920 |
+
"learning_rate": 0.00017774444425396716,
|
10921 |
+
"loss": 1.3558,
|
10922 |
+
"step": 77900
|
10923 |
+
},
|
10924 |
+
{
|
10925 |
+
"epoch": 2.327351983996656,
|
10926 |
+
"grad_norm": 4.96250057220459,
|
10927 |
+
"learning_rate": 0.00017773015845804835,
|
10928 |
+
"loss": 1.2478,
|
10929 |
+
"step": 77950
|
10930 |
+
},
|
10931 |
+
{
|
10932 |
+
"epoch": 2.3288448332487386,
|
10933 |
+
"grad_norm": 7.093445301055908,
|
10934 |
+
"learning_rate": 0.0001777158726621295,
|
10935 |
+
"loss": 1.2659,
|
10936 |
+
"step": 78000
|
10937 |
+
},
|
10938 |
+
{
|
10939 |
+
"epoch": 2.3303376825008213,
|
10940 |
+
"grad_norm": 4.415938854217529,
|
10941 |
+
"learning_rate": 0.00017770158686621068,
|
10942 |
+
"loss": 1.2994,
|
10943 |
+
"step": 78050
|
10944 |
+
},
|
10945 |
+
{
|
10946 |
+
"epoch": 2.3318305317529036,
|
10947 |
+
"grad_norm": 6.983063697814941,
|
10948 |
+
"learning_rate": 0.00017768730107029185,
|
10949 |
+
"loss": 1.2994,
|
10950 |
+
"step": 78100
|
10951 |
+
},
|
10952 |
+
{
|
10953 |
+
"epoch": 2.3333233810049863,
|
10954 |
+
"grad_norm": 6.34420919418335,
|
10955 |
+
"learning_rate": 0.000177673015274373,
|
10956 |
+
"loss": 1.3172,
|
10957 |
+
"step": 78150
|
10958 |
+
},
|
10959 |
+
{
|
10960 |
+
"epoch": 2.3348162302570685,
|
10961 |
+
"grad_norm": 4.156967639923096,
|
10962 |
+
"learning_rate": 0.00017765872947845418,
|
10963 |
+
"loss": 1.2425,
|
10964 |
+
"step": 78200
|
10965 |
+
},
|
10966 |
+
{
|
10967 |
+
"epoch": 2.3363090795091512,
|
10968 |
+
"grad_norm": 4.001344680786133,
|
10969 |
+
"learning_rate": 0.00017764444368253534,
|
10970 |
+
"loss": 1.3034,
|
10971 |
+
"step": 78250
|
10972 |
+
},
|
10973 |
+
{
|
10974 |
+
"epoch": 2.337801928761234,
|
10975 |
+
"grad_norm": 5.008638381958008,
|
10976 |
+
"learning_rate": 0.0001776301578866165,
|
10977 |
+
"loss": 1.2912,
|
10978 |
+
"step": 78300
|
10979 |
+
},
|
10980 |
+
{
|
10981 |
+
"epoch": 2.339294778013316,
|
10982 |
+
"grad_norm": 3.917297840118408,
|
10983 |
+
"learning_rate": 0.00017761587209069767,
|
10984 |
+
"loss": 1.2807,
|
10985 |
+
"step": 78350
|
10986 |
+
},
|
10987 |
+
{
|
10988 |
+
"epoch": 2.340787627265399,
|
10989 |
+
"grad_norm": 4.353655815124512,
|
10990 |
+
"learning_rate": 0.00017760158629477884,
|
10991 |
+
"loss": 1.3237,
|
10992 |
+
"step": 78400
|
10993 |
+
},
|
10994 |
+
{
|
10995 |
+
"epoch": 2.342280476517481,
|
10996 |
+
"grad_norm": 5.5648698806762695,
|
10997 |
+
"learning_rate": 0.00017758730049886,
|
10998 |
+
"loss": 1.3145,
|
10999 |
+
"step": 78450
|
11000 |
+
},
|
11001 |
+
{
|
11002 |
+
"epoch": 2.343773325769564,
|
11003 |
+
"grad_norm": 3.928065538406372,
|
11004 |
+
"learning_rate": 0.00017757301470294116,
|
11005 |
+
"loss": 1.297,
|
11006 |
+
"step": 78500
|
11007 |
+
},
|
11008 |
+
{
|
11009 |
+
"epoch": 2.345266175021646,
|
11010 |
+
"grad_norm": 4.029613494873047,
|
11011 |
+
"learning_rate": 0.00017755872890702233,
|
11012 |
+
"loss": 1.2477,
|
11013 |
+
"step": 78550
|
11014 |
+
},
|
11015 |
+
{
|
11016 |
+
"epoch": 2.346759024273729,
|
11017 |
+
"grad_norm": 3.9108121395111084,
|
11018 |
+
"learning_rate": 0.0001775444431111035,
|
11019 |
+
"loss": 1.2537,
|
11020 |
+
"step": 78600
|
11021 |
+
},
|
11022 |
+
{
|
11023 |
+
"epoch": 2.3482518735258116,
|
11024 |
+
"grad_norm": 4.727762699127197,
|
11025 |
+
"learning_rate": 0.00017753015731518469,
|
11026 |
+
"loss": 1.2956,
|
11027 |
+
"step": 78650
|
11028 |
+
},
|
11029 |
+
{
|
11030 |
+
"epoch": 2.349744722777894,
|
11031 |
+
"grad_norm": 4.287507057189941,
|
11032 |
+
"learning_rate": 0.00017751587151926582,
|
11033 |
+
"loss": 1.2689,
|
11034 |
+
"step": 78700
|
11035 |
+
},
|
11036 |
+
{
|
11037 |
+
"epoch": 2.3512375720299765,
|
11038 |
+
"grad_norm": 4.162285804748535,
|
11039 |
+
"learning_rate": 0.00017750158572334701,
|
11040 |
+
"loss": 1.2803,
|
11041 |
+
"step": 78750
|
11042 |
+
},
|
11043 |
+
{
|
11044 |
+
"epoch": 2.352730421282059,
|
11045 |
+
"grad_norm": 5.458852291107178,
|
11046 |
+
"learning_rate": 0.00017748729992742815,
|
11047 |
+
"loss": 1.2789,
|
11048 |
+
"step": 78800
|
11049 |
+
},
|
11050 |
+
{
|
11051 |
+
"epoch": 2.3542232705341415,
|
11052 |
+
"grad_norm": 4.103569984436035,
|
11053 |
+
"learning_rate": 0.00017747301413150934,
|
11054 |
+
"loss": 1.3299,
|
11055 |
+
"step": 78850
|
11056 |
+
},
|
11057 |
+
{
|
11058 |
+
"epoch": 2.3557161197862238,
|
11059 |
+
"grad_norm": 4.286417007446289,
|
11060 |
+
"learning_rate": 0.0001774587283355905,
|
11061 |
+
"loss": 1.3333,
|
11062 |
+
"step": 78900
|
11063 |
+
},
|
11064 |
+
{
|
11065 |
+
"epoch": 2.3572089690383065,
|
11066 |
+
"grad_norm": 4.811729431152344,
|
11067 |
+
"learning_rate": 0.00017744444253967167,
|
11068 |
+
"loss": 1.3414,
|
11069 |
+
"step": 78950
|
11070 |
+
},
|
11071 |
+
{
|
11072 |
+
"epoch": 2.358701818290389,
|
11073 |
+
"grad_norm": 3.960171699523926,
|
11074 |
+
"learning_rate": 0.00017743015674375284,
|
11075 |
+
"loss": 1.2661,
|
11076 |
+
"step": 79000
|
11077 |
+
},
|
11078 |
+
{
|
11079 |
+
"epoch": 2.3601946675424714,
|
11080 |
+
"grad_norm": 5.161068439483643,
|
11081 |
+
"learning_rate": 0.000177415870947834,
|
11082 |
+
"loss": 1.2622,
|
11083 |
+
"step": 79050
|
11084 |
+
},
|
11085 |
+
{
|
11086 |
+
"epoch": 2.361687516794554,
|
11087 |
+
"grad_norm": 5.81963586807251,
|
11088 |
+
"learning_rate": 0.00017740158515191517,
|
11089 |
+
"loss": 1.3336,
|
11090 |
+
"step": 79100
|
11091 |
+
},
|
11092 |
+
{
|
11093 |
+
"epoch": 2.3631803660466364,
|
11094 |
+
"grad_norm": 5.815190315246582,
|
11095 |
+
"learning_rate": 0.00017738729935599633,
|
11096 |
+
"loss": 1.3078,
|
11097 |
+
"step": 79150
|
11098 |
+
},
|
11099 |
+
{
|
11100 |
+
"epoch": 2.364673215298719,
|
11101 |
+
"grad_norm": 3.994666814804077,
|
11102 |
+
"learning_rate": 0.0001773730135600775,
|
11103 |
+
"loss": 1.2237,
|
11104 |
+
"step": 79200
|
11105 |
+
},
|
11106 |
+
{
|
11107 |
+
"epoch": 2.366166064550802,
|
11108 |
+
"grad_norm": 5.232024669647217,
|
11109 |
+
"learning_rate": 0.00017735872776415866,
|
11110 |
+
"loss": 1.3267,
|
11111 |
+
"step": 79250
|
11112 |
+
},
|
11113 |
+
{
|
11114 |
+
"epoch": 2.367658913802884,
|
11115 |
+
"grad_norm": 4.517995357513428,
|
11116 |
+
"learning_rate": 0.00017734444196823983,
|
11117 |
+
"loss": 1.3168,
|
11118 |
+
"step": 79300
|
11119 |
+
},
|
11120 |
+
{
|
11121 |
+
"epoch": 2.3691517630549668,
|
11122 |
+
"grad_norm": 4.49736213684082,
|
11123 |
+
"learning_rate": 0.000177330156172321,
|
11124 |
+
"loss": 1.3403,
|
11125 |
+
"step": 79350
|
11126 |
+
},
|
11127 |
+
{
|
11128 |
+
"epoch": 2.370644612307049,
|
11129 |
+
"grad_norm": 5.295841693878174,
|
11130 |
+
"learning_rate": 0.00017731587037640216,
|
11131 |
+
"loss": 1.2639,
|
11132 |
+
"step": 79400
|
11133 |
+
},
|
11134 |
+
{
|
11135 |
+
"epoch": 2.3721374615591317,
|
11136 |
+
"grad_norm": 6.010612487792969,
|
11137 |
+
"learning_rate": 0.00017730158458048332,
|
11138 |
+
"loss": 1.273,
|
11139 |
+
"step": 79450
|
11140 |
+
},
|
11141 |
+
{
|
11142 |
+
"epoch": 2.3736303108112144,
|
11143 |
+
"grad_norm": 4.014612674713135,
|
11144 |
+
"learning_rate": 0.00017728729878456448,
|
11145 |
+
"loss": 1.3027,
|
11146 |
+
"step": 79500
|
11147 |
+
},
|
11148 |
+
{
|
11149 |
+
"epoch": 2.3751231600632967,
|
11150 |
+
"grad_norm": 5.472930908203125,
|
11151 |
+
"learning_rate": 0.00017727301298864565,
|
11152 |
+
"loss": 1.2927,
|
11153 |
+
"step": 79550
|
11154 |
+
},
|
11155 |
+
{
|
11156 |
+
"epoch": 2.3766160093153794,
|
11157 |
+
"grad_norm": 5.090315341949463,
|
11158 |
+
"learning_rate": 0.00017725872719272681,
|
11159 |
+
"loss": 1.2994,
|
11160 |
+
"step": 79600
|
11161 |
+
},
|
11162 |
+
{
|
11163 |
+
"epoch": 2.3781088585674617,
|
11164 |
+
"grad_norm": 4.583268165588379,
|
11165 |
+
"learning_rate": 0.00017724444139680798,
|
11166 |
+
"loss": 1.3249,
|
11167 |
+
"step": 79650
|
11168 |
+
},
|
11169 |
+
{
|
11170 |
+
"epoch": 2.3796017078195444,
|
11171 |
+
"grad_norm": 5.765147686004639,
|
11172 |
+
"learning_rate": 0.00017723015560088917,
|
11173 |
+
"loss": 1.2582,
|
11174 |
+
"step": 79700
|
11175 |
+
},
|
11176 |
+
{
|
11177 |
+
"epoch": 2.381094557071627,
|
11178 |
+
"grad_norm": 4.751801490783691,
|
11179 |
+
"learning_rate": 0.0001772158698049703,
|
11180 |
+
"loss": 1.2497,
|
11181 |
+
"step": 79750
|
11182 |
+
},
|
11183 |
+
{
|
11184 |
+
"epoch": 2.3825874063237094,
|
11185 |
+
"grad_norm": 6.316953182220459,
|
11186 |
+
"learning_rate": 0.0001772015840090515,
|
11187 |
+
"loss": 1.2949,
|
11188 |
+
"step": 79800
|
11189 |
+
},
|
11190 |
+
{
|
11191 |
+
"epoch": 2.384080255575792,
|
11192 |
+
"grad_norm": 4.6090779304504395,
|
11193 |
+
"learning_rate": 0.00017718729821313264,
|
11194 |
+
"loss": 1.2922,
|
11195 |
+
"step": 79850
|
11196 |
+
},
|
11197 |
+
{
|
11198 |
+
"epoch": 2.3855731048278743,
|
11199 |
+
"grad_norm": 5.004429340362549,
|
11200 |
+
"learning_rate": 0.00017717301241721383,
|
11201 |
+
"loss": 1.336,
|
11202 |
+
"step": 79900
|
11203 |
+
},
|
11204 |
+
{
|
11205 |
+
"epoch": 2.387065954079957,
|
11206 |
+
"grad_norm": 5.7317352294921875,
|
11207 |
+
"learning_rate": 0.00017715872662129497,
|
11208 |
+
"loss": 1.2296,
|
11209 |
+
"step": 79950
|
11210 |
+
},
|
11211 |
+
{
|
11212 |
+
"epoch": 2.3885588033320397,
|
11213 |
+
"grad_norm": 4.938337326049805,
|
11214 |
+
"learning_rate": 0.00017714444082537616,
|
11215 |
+
"loss": 1.3118,
|
11216 |
+
"step": 80000
|
11217 |
}
|
11218 |
],
|
11219 |
"logging_steps": 50,
|
|
|
11233 |
"attributes": {}
|
11234 |
}
|
11235 |
},
|
11236 |
+
"total_flos": 2.020639818873045e+18,
|
11237 |
"train_batch_size": 2,
|
11238 |
"trial_name": null,
|
11239 |
"trial_params": null
|