Training in progress, step 18000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +703 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 715030586
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b00afd84b6c9ce17eaf6cde875a1462d2a5f0a7c0b9c73a9b93dfa70356a2e2
|
| 3 |
size 715030586
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1032262338
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ebce57f62b5c08e94d3ef4d4c19d6f624921ff13378d5f419a1a0fc63ae8de2
|
| 3 |
size 1032262338
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13c1b31558f9530223d30967d940c908110b66ae87767dc8b41640c0ec2ab3ad
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31e1f3d55bb567df3a2ebf344a0ee08608b18736ddff2de100218656482b16ab
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b7068584adf4719cad732133ffdff00b498545ab4f7b6d887d675a74b59641e2
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f43ad3e51655951e2a9c021cf9bdd46d25eb6df7a162e3fc18fe50a401173803
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:add33ce1c647f1ad24436fdd2c7095ade5081fad618777000690c7e187278b49
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -11908,6 +11908,706 @@
|
|
| 11908 |
"learning_rate": 0.000494636149601328,
|
| 11909 |
"loss": 17.8281,
|
| 11910 |
"step": 17000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11911 |
}
|
| 11912 |
],
|
| 11913 |
"logging_steps": 10,
|
|
@@ -11927,7 +12627,7 @@
|
|
| 11927 |
"attributes": {}
|
| 11928 |
}
|
| 11929 |
},
|
| 11930 |
-
"total_flos": 3.
|
| 11931 |
"train_batch_size": 48,
|
| 11932 |
"trial_name": null,
|
| 11933 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.035099521769015894,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 18000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 11908 |
"learning_rate": 0.000494636149601328,
|
| 11909 |
"loss": 17.8281,
|
| 11910 |
"step": 17000
|
| 11911 |
+
},
|
| 11912 |
+
{
|
| 11913 |
+
"epoch": 0.033169048071720025,
|
| 11914 |
+
"grad_norm": 8.75,
|
| 11915 |
+
"learning_rate": 0.0004946328985858733,
|
| 11916 |
+
"loss": 17.885,
|
| 11917 |
+
"step": 17010
|
| 11918 |
+
},
|
| 11919 |
+
{
|
| 11920 |
+
"epoch": 0.03318854780603614,
|
| 11921 |
+
"grad_norm": 8.375,
|
| 11922 |
+
"learning_rate": 0.0004946296475704186,
|
| 11923 |
+
"loss": 17.946,
|
| 11924 |
+
"step": 17020
|
| 11925 |
+
},
|
| 11926 |
+
{
|
| 11927 |
+
"epoch": 0.03320804754035226,
|
| 11928 |
+
"grad_norm": 8.5,
|
| 11929 |
+
"learning_rate": 0.000494626396554964,
|
| 11930 |
+
"loss": 17.7649,
|
| 11931 |
+
"step": 17030
|
| 11932 |
+
},
|
| 11933 |
+
{
|
| 11934 |
+
"epoch": 0.03322754727466838,
|
| 11935 |
+
"grad_norm": 9.5,
|
| 11936 |
+
"learning_rate": 0.0004946231455395093,
|
| 11937 |
+
"loss": 17.8435,
|
| 11938 |
+
"step": 17040
|
| 11939 |
+
},
|
| 11940 |
+
{
|
| 11941 |
+
"epoch": 0.0332470470089845,
|
| 11942 |
+
"grad_norm": 22.375,
|
| 11943 |
+
"learning_rate": 0.0004946198945240546,
|
| 11944 |
+
"loss": 17.7794,
|
| 11945 |
+
"step": 17050
|
| 11946 |
+
},
|
| 11947 |
+
{
|
| 11948 |
+
"epoch": 0.03326654674330062,
|
| 11949 |
+
"grad_norm": 8.75,
|
| 11950 |
+
"learning_rate": 0.0004946166435085999,
|
| 11951 |
+
"loss": 17.7582,
|
| 11952 |
+
"step": 17060
|
| 11953 |
+
},
|
| 11954 |
+
{
|
| 11955 |
+
"epoch": 0.033286046477616744,
|
| 11956 |
+
"grad_norm": 8.4375,
|
| 11957 |
+
"learning_rate": 0.0004946133924931453,
|
| 11958 |
+
"loss": 17.9408,
|
| 11959 |
+
"step": 17070
|
| 11960 |
+
},
|
| 11961 |
+
{
|
| 11962 |
+
"epoch": 0.033305546211932865,
|
| 11963 |
+
"grad_norm": 8.1875,
|
| 11964 |
+
"learning_rate": 0.0004946101414776906,
|
| 11965 |
+
"loss": 17.8288,
|
| 11966 |
+
"step": 17080
|
| 11967 |
+
},
|
| 11968 |
+
{
|
| 11969 |
+
"epoch": 0.03332504594624898,
|
| 11970 |
+
"grad_norm": 8.1875,
|
| 11971 |
+
"learning_rate": 0.0004946068904622359,
|
| 11972 |
+
"loss": 17.8074,
|
| 11973 |
+
"step": 17090
|
| 11974 |
+
},
|
| 11975 |
+
{
|
| 11976 |
+
"epoch": 0.0333445456805651,
|
| 11977 |
+
"grad_norm": 11.5,
|
| 11978 |
+
"learning_rate": 0.0004946036394467813,
|
| 11979 |
+
"loss": 17.7826,
|
| 11980 |
+
"step": 17100
|
| 11981 |
+
},
|
| 11982 |
+
{
|
| 11983 |
+
"epoch": 0.03336404541488122,
|
| 11984 |
+
"grad_norm": 8.25,
|
| 11985 |
+
"learning_rate": 0.0004946003884313266,
|
| 11986 |
+
"loss": 17.8136,
|
| 11987 |
+
"step": 17110
|
| 11988 |
+
},
|
| 11989 |
+
{
|
| 11990 |
+
"epoch": 0.03338354514919734,
|
| 11991 |
+
"grad_norm": 7.90625,
|
| 11992 |
+
"learning_rate": 0.0004945971374158719,
|
| 11993 |
+
"loss": 17.7152,
|
| 11994 |
+
"step": 17120
|
| 11995 |
+
},
|
| 11996 |
+
{
|
| 11997 |
+
"epoch": 0.033403044883513464,
|
| 11998 |
+
"grad_norm": 9.1875,
|
| 11999 |
+
"learning_rate": 0.0004945938864004172,
|
| 12000 |
+
"loss": 17.6264,
|
| 12001 |
+
"step": 17130
|
| 12002 |
+
},
|
| 12003 |
+
{
|
| 12004 |
+
"epoch": 0.033422544617829585,
|
| 12005 |
+
"grad_norm": 8.875,
|
| 12006 |
+
"learning_rate": 0.0004945906353849625,
|
| 12007 |
+
"loss": 17.6933,
|
| 12008 |
+
"step": 17140
|
| 12009 |
+
},
|
| 12010 |
+
{
|
| 12011 |
+
"epoch": 0.0334420443521457,
|
| 12012 |
+
"grad_norm": 9.0625,
|
| 12013 |
+
"learning_rate": 0.0004945873843695078,
|
| 12014 |
+
"loss": 17.7102,
|
| 12015 |
+
"step": 17150
|
| 12016 |
+
},
|
| 12017 |
+
{
|
| 12018 |
+
"epoch": 0.03346154408646182,
|
| 12019 |
+
"grad_norm": 8.3125,
|
| 12020 |
+
"learning_rate": 0.0004945841333540531,
|
| 12021 |
+
"loss": 17.7096,
|
| 12022 |
+
"step": 17160
|
| 12023 |
+
},
|
| 12024 |
+
{
|
| 12025 |
+
"epoch": 0.03348104382077794,
|
| 12026 |
+
"grad_norm": 7.9375,
|
| 12027 |
+
"learning_rate": 0.0004945808823385984,
|
| 12028 |
+
"loss": 17.5945,
|
| 12029 |
+
"step": 17170
|
| 12030 |
+
},
|
| 12031 |
+
{
|
| 12032 |
+
"epoch": 0.03350054355509406,
|
| 12033 |
+
"grad_norm": 9.5625,
|
| 12034 |
+
"learning_rate": 0.0004945776313231438,
|
| 12035 |
+
"loss": 17.7132,
|
| 12036 |
+
"step": 17180
|
| 12037 |
+
},
|
| 12038 |
+
{
|
| 12039 |
+
"epoch": 0.03352004328941018,
|
| 12040 |
+
"grad_norm": 8.625,
|
| 12041 |
+
"learning_rate": 0.0004945743803076891,
|
| 12042 |
+
"loss": 17.8003,
|
| 12043 |
+
"step": 17190
|
| 12044 |
+
},
|
| 12045 |
+
{
|
| 12046 |
+
"epoch": 0.033539543023726304,
|
| 12047 |
+
"grad_norm": 9.4375,
|
| 12048 |
+
"learning_rate": 0.0004945711292922344,
|
| 12049 |
+
"loss": 17.8819,
|
| 12050 |
+
"step": 17200
|
| 12051 |
+
},
|
| 12052 |
+
{
|
| 12053 |
+
"epoch": 0.033559042758042425,
|
| 12054 |
+
"grad_norm": 11.8125,
|
| 12055 |
+
"learning_rate": 0.0004945678782767798,
|
| 12056 |
+
"loss": 17.7493,
|
| 12057 |
+
"step": 17210
|
| 12058 |
+
},
|
| 12059 |
+
{
|
| 12060 |
+
"epoch": 0.03357854249235854,
|
| 12061 |
+
"grad_norm": 8.9375,
|
| 12062 |
+
"learning_rate": 0.0004945646272613251,
|
| 12063 |
+
"loss": 17.8033,
|
| 12064 |
+
"step": 17220
|
| 12065 |
+
},
|
| 12066 |
+
{
|
| 12067 |
+
"epoch": 0.03359804222667466,
|
| 12068 |
+
"grad_norm": 8.25,
|
| 12069 |
+
"learning_rate": 0.0004945613762458704,
|
| 12070 |
+
"loss": 17.7948,
|
| 12071 |
+
"step": 17230
|
| 12072 |
+
},
|
| 12073 |
+
{
|
| 12074 |
+
"epoch": 0.03361754196099078,
|
| 12075 |
+
"grad_norm": 8.875,
|
| 12076 |
+
"learning_rate": 0.0004945581252304157,
|
| 12077 |
+
"loss": 17.6432,
|
| 12078 |
+
"step": 17240
|
| 12079 |
+
},
|
| 12080 |
+
{
|
| 12081 |
+
"epoch": 0.0336370416953069,
|
| 12082 |
+
"grad_norm": 8.125,
|
| 12083 |
+
"learning_rate": 0.0004945548742149611,
|
| 12084 |
+
"loss": 17.7967,
|
| 12085 |
+
"step": 17250
|
| 12086 |
+
},
|
| 12087 |
+
{
|
| 12088 |
+
"epoch": 0.03365654142962302,
|
| 12089 |
+
"grad_norm": 9.4375,
|
| 12090 |
+
"learning_rate": 0.0004945516231995064,
|
| 12091 |
+
"loss": 17.7517,
|
| 12092 |
+
"step": 17260
|
| 12093 |
+
},
|
| 12094 |
+
{
|
| 12095 |
+
"epoch": 0.033676041163939144,
|
| 12096 |
+
"grad_norm": 7.15625,
|
| 12097 |
+
"learning_rate": 0.0004945483721840517,
|
| 12098 |
+
"loss": 17.8602,
|
| 12099 |
+
"step": 17270
|
| 12100 |
+
},
|
| 12101 |
+
{
|
| 12102 |
+
"epoch": 0.03369554089825526,
|
| 12103 |
+
"grad_norm": 8.3125,
|
| 12104 |
+
"learning_rate": 0.0004945451211685971,
|
| 12105 |
+
"loss": 17.7164,
|
| 12106 |
+
"step": 17280
|
| 12107 |
+
},
|
| 12108 |
+
{
|
| 12109 |
+
"epoch": 0.03371504063257138,
|
| 12110 |
+
"grad_norm": 8.9375,
|
| 12111 |
+
"learning_rate": 0.0004945418701531423,
|
| 12112 |
+
"loss": 17.6317,
|
| 12113 |
+
"step": 17290
|
| 12114 |
+
},
|
| 12115 |
+
{
|
| 12116 |
+
"epoch": 0.0337345403668875,
|
| 12117 |
+
"grad_norm": 10.5625,
|
| 12118 |
+
"learning_rate": 0.0004945386191376876,
|
| 12119 |
+
"loss": 17.71,
|
| 12120 |
+
"step": 17300
|
| 12121 |
+
},
|
| 12122 |
+
{
|
| 12123 |
+
"epoch": 0.03375404010120362,
|
| 12124 |
+
"grad_norm": 8.125,
|
| 12125 |
+
"learning_rate": 0.0004945353681222329,
|
| 12126 |
+
"loss": 17.7735,
|
| 12127 |
+
"step": 17310
|
| 12128 |
+
},
|
| 12129 |
+
{
|
| 12130 |
+
"epoch": 0.03377353983551974,
|
| 12131 |
+
"grad_norm": 8.5,
|
| 12132 |
+
"learning_rate": 0.0004945321171067783,
|
| 12133 |
+
"loss": 17.8358,
|
| 12134 |
+
"step": 17320
|
| 12135 |
+
},
|
| 12136 |
+
{
|
| 12137 |
+
"epoch": 0.03379303956983586,
|
| 12138 |
+
"grad_norm": 8.9375,
|
| 12139 |
+
"learning_rate": 0.0004945288660913236,
|
| 12140 |
+
"loss": 17.6823,
|
| 12141 |
+
"step": 17330
|
| 12142 |
+
},
|
| 12143 |
+
{
|
| 12144 |
+
"epoch": 0.033812539304151984,
|
| 12145 |
+
"grad_norm": 7.90625,
|
| 12146 |
+
"learning_rate": 0.0004945256150758689,
|
| 12147 |
+
"loss": 17.6095,
|
| 12148 |
+
"step": 17340
|
| 12149 |
+
},
|
| 12150 |
+
{
|
| 12151 |
+
"epoch": 0.0338320390384681,
|
| 12152 |
+
"grad_norm": 8.625,
|
| 12153 |
+
"learning_rate": 0.0004945223640604143,
|
| 12154 |
+
"loss": 17.7989,
|
| 12155 |
+
"step": 17350
|
| 12156 |
+
},
|
| 12157 |
+
{
|
| 12158 |
+
"epoch": 0.03385153877278422,
|
| 12159 |
+
"grad_norm": 9.25,
|
| 12160 |
+
"learning_rate": 0.0004945191130449596,
|
| 12161 |
+
"loss": 17.8012,
|
| 12162 |
+
"step": 17360
|
| 12163 |
+
},
|
| 12164 |
+
{
|
| 12165 |
+
"epoch": 0.03387103850710034,
|
| 12166 |
+
"grad_norm": 8.5625,
|
| 12167 |
+
"learning_rate": 0.0004945158620295049,
|
| 12168 |
+
"loss": 17.701,
|
| 12169 |
+
"step": 17370
|
| 12170 |
+
},
|
| 12171 |
+
{
|
| 12172 |
+
"epoch": 0.03389053824141646,
|
| 12173 |
+
"grad_norm": 9.1875,
|
| 12174 |
+
"learning_rate": 0.0004945126110140502,
|
| 12175 |
+
"loss": 17.6966,
|
| 12176 |
+
"step": 17380
|
| 12177 |
+
},
|
| 12178 |
+
{
|
| 12179 |
+
"epoch": 0.03391003797573258,
|
| 12180 |
+
"grad_norm": 8.75,
|
| 12181 |
+
"learning_rate": 0.0004945093599985956,
|
| 12182 |
+
"loss": 17.7201,
|
| 12183 |
+
"step": 17390
|
| 12184 |
+
},
|
| 12185 |
+
{
|
| 12186 |
+
"epoch": 0.0339295377100487,
|
| 12187 |
+
"grad_norm": 8.0,
|
| 12188 |
+
"learning_rate": 0.0004945061089831409,
|
| 12189 |
+
"loss": 17.7151,
|
| 12190 |
+
"step": 17400
|
| 12191 |
+
},
|
| 12192 |
+
{
|
| 12193 |
+
"epoch": 0.03394903744436482,
|
| 12194 |
+
"grad_norm": 8.8125,
|
| 12195 |
+
"learning_rate": 0.0004945028579676862,
|
| 12196 |
+
"loss": 17.5905,
|
| 12197 |
+
"step": 17410
|
| 12198 |
+
},
|
| 12199 |
+
{
|
| 12200 |
+
"epoch": 0.03396853717868094,
|
| 12201 |
+
"grad_norm": 9.1875,
|
| 12202 |
+
"learning_rate": 0.0004944996069522316,
|
| 12203 |
+
"loss": 17.7166,
|
| 12204 |
+
"step": 17420
|
| 12205 |
+
},
|
| 12206 |
+
{
|
| 12207 |
+
"epoch": 0.03398803691299706,
|
| 12208 |
+
"grad_norm": 8.625,
|
| 12209 |
+
"learning_rate": 0.0004944963559367769,
|
| 12210 |
+
"loss": 17.7541,
|
| 12211 |
+
"step": 17430
|
| 12212 |
+
},
|
| 12213 |
+
{
|
| 12214 |
+
"epoch": 0.03400753664731318,
|
| 12215 |
+
"grad_norm": 8.1875,
|
| 12216 |
+
"learning_rate": 0.0004944931049213222,
|
| 12217 |
+
"loss": 17.6131,
|
| 12218 |
+
"step": 17440
|
| 12219 |
+
},
|
| 12220 |
+
{
|
| 12221 |
+
"epoch": 0.0340270363816293,
|
| 12222 |
+
"grad_norm": 8.4375,
|
| 12223 |
+
"learning_rate": 0.0004944898539058674,
|
| 12224 |
+
"loss": 17.6559,
|
| 12225 |
+
"step": 17450
|
| 12226 |
+
},
|
| 12227 |
+
{
|
| 12228 |
+
"epoch": 0.03404653611594542,
|
| 12229 |
+
"grad_norm": 8.0625,
|
| 12230 |
+
"learning_rate": 0.0004944866028904128,
|
| 12231 |
+
"loss": 17.7009,
|
| 12232 |
+
"step": 17460
|
| 12233 |
+
},
|
| 12234 |
+
{
|
| 12235 |
+
"epoch": 0.03406603585026154,
|
| 12236 |
+
"grad_norm": 7.78125,
|
| 12237 |
+
"learning_rate": 0.0004944833518749581,
|
| 12238 |
+
"loss": 17.7258,
|
| 12239 |
+
"step": 17470
|
| 12240 |
+
},
|
| 12241 |
+
{
|
| 12242 |
+
"epoch": 0.03408553558457766,
|
| 12243 |
+
"grad_norm": 7.75,
|
| 12244 |
+
"learning_rate": 0.0004944801008595034,
|
| 12245 |
+
"loss": 17.687,
|
| 12246 |
+
"step": 17480
|
| 12247 |
+
},
|
| 12248 |
+
{
|
| 12249 |
+
"epoch": 0.03410503531889378,
|
| 12250 |
+
"grad_norm": 9.1875,
|
| 12251 |
+
"learning_rate": 0.0004944768498440487,
|
| 12252 |
+
"loss": 17.6723,
|
| 12253 |
+
"step": 17490
|
| 12254 |
+
},
|
| 12255 |
+
{
|
| 12256 |
+
"epoch": 0.0341245350532099,
|
| 12257 |
+
"grad_norm": 8.0625,
|
| 12258 |
+
"learning_rate": 0.0004944735988285941,
|
| 12259 |
+
"loss": 17.6988,
|
| 12260 |
+
"step": 17500
|
| 12261 |
+
},
|
| 12262 |
+
{
|
| 12263 |
+
"epoch": 0.03414403478752602,
|
| 12264 |
+
"grad_norm": 8.875,
|
| 12265 |
+
"learning_rate": 0.0004944703478131394,
|
| 12266 |
+
"loss": 17.8097,
|
| 12267 |
+
"step": 17510
|
| 12268 |
+
},
|
| 12269 |
+
{
|
| 12270 |
+
"epoch": 0.03416353452184214,
|
| 12271 |
+
"grad_norm": 9.0625,
|
| 12272 |
+
"learning_rate": 0.0004944670967976847,
|
| 12273 |
+
"loss": 17.7569,
|
| 12274 |
+
"step": 17520
|
| 12275 |
+
},
|
| 12276 |
+
{
|
| 12277 |
+
"epoch": 0.03418303425615826,
|
| 12278 |
+
"grad_norm": 7.6875,
|
| 12279 |
+
"learning_rate": 0.0004944638457822301,
|
| 12280 |
+
"loss": 17.8112,
|
| 12281 |
+
"step": 17530
|
| 12282 |
+
},
|
| 12283 |
+
{
|
| 12284 |
+
"epoch": 0.034202533990474376,
|
| 12285 |
+
"grad_norm": 8.3125,
|
| 12286 |
+
"learning_rate": 0.0004944605947667754,
|
| 12287 |
+
"loss": 17.7088,
|
| 12288 |
+
"step": 17540
|
| 12289 |
+
},
|
| 12290 |
+
{
|
| 12291 |
+
"epoch": 0.0342220337247905,
|
| 12292 |
+
"grad_norm": 52.75,
|
| 12293 |
+
"learning_rate": 0.0004944573437513207,
|
| 12294 |
+
"loss": 17.743,
|
| 12295 |
+
"step": 17550
|
| 12296 |
+
},
|
| 12297 |
+
{
|
| 12298 |
+
"epoch": 0.03424153345910662,
|
| 12299 |
+
"grad_norm": 7.28125,
|
| 12300 |
+
"learning_rate": 0.000494454092735866,
|
| 12301 |
+
"loss": 17.5971,
|
| 12302 |
+
"step": 17560
|
| 12303 |
+
},
|
| 12304 |
+
{
|
| 12305 |
+
"epoch": 0.03426103319342274,
|
| 12306 |
+
"grad_norm": 8.8125,
|
| 12307 |
+
"learning_rate": 0.0004944508417204114,
|
| 12308 |
+
"loss": 17.5989,
|
| 12309 |
+
"step": 17570
|
| 12310 |
+
},
|
| 12311 |
+
{
|
| 12312 |
+
"epoch": 0.03428053292773886,
|
| 12313 |
+
"grad_norm": 7.71875,
|
| 12314 |
+
"learning_rate": 0.0004944475907049567,
|
| 12315 |
+
"loss": 17.6487,
|
| 12316 |
+
"step": 17580
|
| 12317 |
+
},
|
| 12318 |
+
{
|
| 12319 |
+
"epoch": 0.03430003266205498,
|
| 12320 |
+
"grad_norm": 8.0625,
|
| 12321 |
+
"learning_rate": 0.000494444339689502,
|
| 12322 |
+
"loss": 17.7165,
|
| 12323 |
+
"step": 17590
|
| 12324 |
+
},
|
| 12325 |
+
{
|
| 12326 |
+
"epoch": 0.0343195323963711,
|
| 12327 |
+
"grad_norm": 8.1875,
|
| 12328 |
+
"learning_rate": 0.0004944410886740474,
|
| 12329 |
+
"loss": 17.6039,
|
| 12330 |
+
"step": 17600
|
| 12331 |
+
},
|
| 12332 |
+
{
|
| 12333 |
+
"epoch": 0.034339032130687216,
|
| 12334 |
+
"grad_norm": 9.0,
|
| 12335 |
+
"learning_rate": 0.0004944378376585927,
|
| 12336 |
+
"loss": 17.701,
|
| 12337 |
+
"step": 17610
|
| 12338 |
+
},
|
| 12339 |
+
{
|
| 12340 |
+
"epoch": 0.03435853186500334,
|
| 12341 |
+
"grad_norm": 9.0625,
|
| 12342 |
+
"learning_rate": 0.000494434586643138,
|
| 12343 |
+
"loss": 17.7048,
|
| 12344 |
+
"step": 17620
|
| 12345 |
+
},
|
| 12346 |
+
{
|
| 12347 |
+
"epoch": 0.03437803159931946,
|
| 12348 |
+
"grad_norm": 9.5,
|
| 12349 |
+
"learning_rate": 0.0004944313356276833,
|
| 12350 |
+
"loss": 17.7427,
|
| 12351 |
+
"step": 17630
|
| 12352 |
+
},
|
| 12353 |
+
{
|
| 12354 |
+
"epoch": 0.03439753133363558,
|
| 12355 |
+
"grad_norm": 8.5,
|
| 12356 |
+
"learning_rate": 0.0004944280846122287,
|
| 12357 |
+
"loss": 17.6594,
|
| 12358 |
+
"step": 17640
|
| 12359 |
+
},
|
| 12360 |
+
{
|
| 12361 |
+
"epoch": 0.0344170310679517,
|
| 12362 |
+
"grad_norm": 9.5625,
|
| 12363 |
+
"learning_rate": 0.000494424833596774,
|
| 12364 |
+
"loss": 17.7552,
|
| 12365 |
+
"step": 17650
|
| 12366 |
+
},
|
| 12367 |
+
{
|
| 12368 |
+
"epoch": 0.03443653080226782,
|
| 12369 |
+
"grad_norm": 8.25,
|
| 12370 |
+
"learning_rate": 0.0004944215825813193,
|
| 12371 |
+
"loss": 17.6453,
|
| 12372 |
+
"step": 17660
|
| 12373 |
+
},
|
| 12374 |
+
{
|
| 12375 |
+
"epoch": 0.034456030536583936,
|
| 12376 |
+
"grad_norm": 7.40625,
|
| 12377 |
+
"learning_rate": 0.0004944183315658647,
|
| 12378 |
+
"loss": 17.8145,
|
| 12379 |
+
"step": 17670
|
| 12380 |
+
},
|
| 12381 |
+
{
|
| 12382 |
+
"epoch": 0.03447553027090006,
|
| 12383 |
+
"grad_norm": 9.5,
|
| 12384 |
+
"learning_rate": 0.00049441508055041,
|
| 12385 |
+
"loss": 17.6461,
|
| 12386 |
+
"step": 17680
|
| 12387 |
+
},
|
| 12388 |
+
{
|
| 12389 |
+
"epoch": 0.03449503000521618,
|
| 12390 |
+
"grad_norm": 9.5625,
|
| 12391 |
+
"learning_rate": 0.0004944118295349552,
|
| 12392 |
+
"loss": 17.7198,
|
| 12393 |
+
"step": 17690
|
| 12394 |
+
},
|
| 12395 |
+
{
|
| 12396 |
+
"epoch": 0.0345145297395323,
|
| 12397 |
+
"grad_norm": 8.5625,
|
| 12398 |
+
"learning_rate": 0.0004944085785195005,
|
| 12399 |
+
"loss": 17.7119,
|
| 12400 |
+
"step": 17700
|
| 12401 |
+
},
|
| 12402 |
+
{
|
| 12403 |
+
"epoch": 0.03453402947384842,
|
| 12404 |
+
"grad_norm": 8.0625,
|
| 12405 |
+
"learning_rate": 0.0004944053275040459,
|
| 12406 |
+
"loss": 17.653,
|
| 12407 |
+
"step": 17710
|
| 12408 |
+
},
|
| 12409 |
+
{
|
| 12410 |
+
"epoch": 0.03455352920816454,
|
| 12411 |
+
"grad_norm": 9.125,
|
| 12412 |
+
"learning_rate": 0.0004944020764885912,
|
| 12413 |
+
"loss": 17.7956,
|
| 12414 |
+
"step": 17720
|
| 12415 |
+
},
|
| 12416 |
+
{
|
| 12417 |
+
"epoch": 0.03457302894248066,
|
| 12418 |
+
"grad_norm": 8.3125,
|
| 12419 |
+
"learning_rate": 0.0004943988254731365,
|
| 12420 |
+
"loss": 17.7212,
|
| 12421 |
+
"step": 17730
|
| 12422 |
+
},
|
| 12423 |
+
{
|
| 12424 |
+
"epoch": 0.034592528676796776,
|
| 12425 |
+
"grad_norm": 8.125,
|
| 12426 |
+
"learning_rate": 0.0004943955744576818,
|
| 12427 |
+
"loss": 17.7634,
|
| 12428 |
+
"step": 17740
|
| 12429 |
+
},
|
| 12430 |
+
{
|
| 12431 |
+
"epoch": 0.0346120284111129,
|
| 12432 |
+
"grad_norm": 8.25,
|
| 12433 |
+
"learning_rate": 0.0004943923234422272,
|
| 12434 |
+
"loss": 17.6751,
|
| 12435 |
+
"step": 17750
|
| 12436 |
+
},
|
| 12437 |
+
{
|
| 12438 |
+
"epoch": 0.03463152814542902,
|
| 12439 |
+
"grad_norm": 9.0,
|
| 12440 |
+
"learning_rate": 0.0004943890724267725,
|
| 12441 |
+
"loss": 17.6671,
|
| 12442 |
+
"step": 17760
|
| 12443 |
+
},
|
| 12444 |
+
{
|
| 12445 |
+
"epoch": 0.03465102787974514,
|
| 12446 |
+
"grad_norm": 8.6875,
|
| 12447 |
+
"learning_rate": 0.0004943858214113178,
|
| 12448 |
+
"loss": 17.73,
|
| 12449 |
+
"step": 17770
|
| 12450 |
+
},
|
| 12451 |
+
{
|
| 12452 |
+
"epoch": 0.03467052761406126,
|
| 12453 |
+
"grad_norm": 9.5625,
|
| 12454 |
+
"learning_rate": 0.0004943825703958632,
|
| 12455 |
+
"loss": 17.713,
|
| 12456 |
+
"step": 17780
|
| 12457 |
+
},
|
| 12458 |
+
{
|
| 12459 |
+
"epoch": 0.03469002734837738,
|
| 12460 |
+
"grad_norm": 8.8125,
|
| 12461 |
+
"learning_rate": 0.0004943793193804085,
|
| 12462 |
+
"loss": 17.6888,
|
| 12463 |
+
"step": 17790
|
| 12464 |
+
},
|
| 12465 |
+
{
|
| 12466 |
+
"epoch": 0.034709527082693495,
|
| 12467 |
+
"grad_norm": 8.1875,
|
| 12468 |
+
"learning_rate": 0.0004943760683649538,
|
| 12469 |
+
"loss": 17.6683,
|
| 12470 |
+
"step": 17800
|
| 12471 |
+
},
|
| 12472 |
+
{
|
| 12473 |
+
"epoch": 0.034729026817009616,
|
| 12474 |
+
"grad_norm": 8.3125,
|
| 12475 |
+
"learning_rate": 0.0004943728173494991,
|
| 12476 |
+
"loss": 17.6479,
|
| 12477 |
+
"step": 17810
|
| 12478 |
+
},
|
| 12479 |
+
{
|
| 12480 |
+
"epoch": 0.03474852655132574,
|
| 12481 |
+
"grad_norm": 8.8125,
|
| 12482 |
+
"learning_rate": 0.0004943695663340445,
|
| 12483 |
+
"loss": 17.7058,
|
| 12484 |
+
"step": 17820
|
| 12485 |
+
},
|
| 12486 |
+
{
|
| 12487 |
+
"epoch": 0.03476802628564186,
|
| 12488 |
+
"grad_norm": 9.625,
|
| 12489 |
+
"learning_rate": 0.0004943663153185898,
|
| 12490 |
+
"loss": 17.6571,
|
| 12491 |
+
"step": 17830
|
| 12492 |
+
},
|
| 12493 |
+
{
|
| 12494 |
+
"epoch": 0.03478752601995798,
|
| 12495 |
+
"grad_norm": 11.0625,
|
| 12496 |
+
"learning_rate": 0.0004943630643031351,
|
| 12497 |
+
"loss": 17.6084,
|
| 12498 |
+
"step": 17840
|
| 12499 |
+
},
|
| 12500 |
+
{
|
| 12501 |
+
"epoch": 0.0348070257542741,
|
| 12502 |
+
"grad_norm": 8.875,
|
| 12503 |
+
"learning_rate": 0.0004943598132876805,
|
| 12504 |
+
"loss": 17.6829,
|
| 12505 |
+
"step": 17850
|
| 12506 |
+
},
|
| 12507 |
+
{
|
| 12508 |
+
"epoch": 0.03482652548859022,
|
| 12509 |
+
"grad_norm": 10.4375,
|
| 12510 |
+
"learning_rate": 0.0004943565622722258,
|
| 12511 |
+
"loss": 17.6978,
|
| 12512 |
+
"step": 17860
|
| 12513 |
+
},
|
| 12514 |
+
{
|
| 12515 |
+
"epoch": 0.034846025222906335,
|
| 12516 |
+
"grad_norm": 8.1875,
|
| 12517 |
+
"learning_rate": 0.0004943533112567711,
|
| 12518 |
+
"loss": 17.5957,
|
| 12519 |
+
"step": 17870
|
| 12520 |
+
},
|
| 12521 |
+
{
|
| 12522 |
+
"epoch": 0.034865524957222456,
|
| 12523 |
+
"grad_norm": 8.125,
|
| 12524 |
+
"learning_rate": 0.0004943500602413164,
|
| 12525 |
+
"loss": 17.6825,
|
| 12526 |
+
"step": 17880
|
| 12527 |
+
},
|
| 12528 |
+
{
|
| 12529 |
+
"epoch": 0.03488502469153858,
|
| 12530 |
+
"grad_norm": 8.25,
|
| 12531 |
+
"learning_rate": 0.0004943468092258618,
|
| 12532 |
+
"loss": 17.5999,
|
| 12533 |
+
"step": 17890
|
| 12534 |
+
},
|
| 12535 |
+
{
|
| 12536 |
+
"epoch": 0.0349045244258547,
|
| 12537 |
+
"grad_norm": 8.5,
|
| 12538 |
+
"learning_rate": 0.0004943435582104071,
|
| 12539 |
+
"loss": 17.5818,
|
| 12540 |
+
"step": 17900
|
| 12541 |
+
},
|
| 12542 |
+
{
|
| 12543 |
+
"epoch": 0.03492402416017082,
|
| 12544 |
+
"grad_norm": 8.875,
|
| 12545 |
+
"learning_rate": 0.0004943403071949523,
|
| 12546 |
+
"loss": 17.5646,
|
| 12547 |
+
"step": 17910
|
| 12548 |
+
},
|
| 12549 |
+
{
|
| 12550 |
+
"epoch": 0.03494352389448694,
|
| 12551 |
+
"grad_norm": 7.875,
|
| 12552 |
+
"learning_rate": 0.0004943370561794977,
|
| 12553 |
+
"loss": 17.7599,
|
| 12554 |
+
"step": 17920
|
| 12555 |
+
},
|
| 12556 |
+
{
|
| 12557 |
+
"epoch": 0.03496302362880306,
|
| 12558 |
+
"grad_norm": 9.875,
|
| 12559 |
+
"learning_rate": 0.000494333805164043,
|
| 12560 |
+
"loss": 17.7103,
|
| 12561 |
+
"step": 17930
|
| 12562 |
+
},
|
| 12563 |
+
{
|
| 12564 |
+
"epoch": 0.034982523363119175,
|
| 12565 |
+
"grad_norm": 10.1875,
|
| 12566 |
+
"learning_rate": 0.0004943305541485883,
|
| 12567 |
+
"loss": 17.5457,
|
| 12568 |
+
"step": 17940
|
| 12569 |
+
},
|
| 12570 |
+
{
|
| 12571 |
+
"epoch": 0.035002023097435296,
|
| 12572 |
+
"grad_norm": 7.59375,
|
| 12573 |
+
"learning_rate": 0.0004943273031331336,
|
| 12574 |
+
"loss": 17.6915,
|
| 12575 |
+
"step": 17950
|
| 12576 |
+
},
|
| 12577 |
+
{
|
| 12578 |
+
"epoch": 0.03502152283175142,
|
| 12579 |
+
"grad_norm": 8.5,
|
| 12580 |
+
"learning_rate": 0.000494324052117679,
|
| 12581 |
+
"loss": 17.5628,
|
| 12582 |
+
"step": 17960
|
| 12583 |
+
},
|
| 12584 |
+
{
|
| 12585 |
+
"epoch": 0.03504102256606754,
|
| 12586 |
+
"grad_norm": 8.1875,
|
| 12587 |
+
"learning_rate": 0.0004943208011022243,
|
| 12588 |
+
"loss": 17.7019,
|
| 12589 |
+
"step": 17970
|
| 12590 |
+
},
|
| 12591 |
+
{
|
| 12592 |
+
"epoch": 0.03506052230038366,
|
| 12593 |
+
"grad_norm": 8.0,
|
| 12594 |
+
"learning_rate": 0.0004943175500867696,
|
| 12595 |
+
"loss": 17.7066,
|
| 12596 |
+
"step": 17980
|
| 12597 |
+
},
|
| 12598 |
+
{
|
| 12599 |
+
"epoch": 0.03508002203469978,
|
| 12600 |
+
"grad_norm": 8.5,
|
| 12601 |
+
"learning_rate": 0.000494314299071315,
|
| 12602 |
+
"loss": 17.5608,
|
| 12603 |
+
"step": 17990
|
| 12604 |
+
},
|
| 12605 |
+
{
|
| 12606 |
+
"epoch": 0.035099521769015894,
|
| 12607 |
+
"grad_norm": 7.46875,
|
| 12608 |
+
"learning_rate": 0.0004943110480558603,
|
| 12609 |
+
"loss": 17.5819,
|
| 12610 |
+
"step": 18000
|
| 12611 |
}
|
| 12612 |
],
|
| 12613 |
"logging_steps": 10,
|
|
|
|
| 12627 |
"attributes": {}
|
| 12628 |
}
|
| 12629 |
},
|
| 12630 |
+
"total_flos": 3.889326067389196e+19,
|
| 12631 |
"train_batch_size": 48,
|
| 12632 |
"trial_name": null,
|
| 12633 |
"trial_params": null
|