Upload folder using huggingface_hub
Browse files- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +1402 -2
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1909053417
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56afd69625f74b85d8d8b93b018355cdbf712ebe54af7a747ee606a6bc2baa6d
|
3 |
size 1909053417
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 969281034
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c749300134ffd13cb9d01d2be78a9ce062bcde946ce2c54b6e23cb954ef3fbbc
|
3 |
size 969281034
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b35be688ac811fe8314937452146b603f1e84e2bed9a25b5b4fc4a96fa54de1c
|
3 |
size 14244
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27d7ea6c17357426b5421960fee5ce87458f766c18f2dbdb748460fc3478b779
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -11207,6 +11207,1406 @@
|
|
11207 |
"learning_rate": 9.973884285298932e-06,
|
11208 |
"loss": 154.7749,
|
11209 |
"step": 16000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11210 |
}
|
11211 |
],
|
11212 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.1489018488646234,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 18000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
11207 |
"learning_rate": 9.973884285298932e-06,
|
11208 |
"loss": 154.7749,
|
11209 |
"step": 16000
|
11210 |
+
},
|
11211 |
+
{
|
11212 |
+
"epoch": 0.13243992224014559,
|
11213 |
+
"grad_norm": 1181.715576171875,
|
11214 |
+
"learning_rate": 9.9737401262866e-06,
|
11215 |
+
"loss": 163.9049,
|
11216 |
+
"step": 16010
|
11217 |
+
},
|
11218 |
+
{
|
11219 |
+
"epoch": 0.13252264548951484,
|
11220 |
+
"grad_norm": 1441.7060546875,
|
11221 |
+
"learning_rate": 9.973595571536593e-06,
|
11222 |
+
"loss": 131.6654,
|
11223 |
+
"step": 16020
|
11224 |
+
},
|
11225 |
+
{
|
11226 |
+
"epoch": 0.13260536873888407,
|
11227 |
+
"grad_norm": 1810.145751953125,
|
11228 |
+
"learning_rate": 9.973450621060412e-06,
|
11229 |
+
"loss": 155.4361,
|
11230 |
+
"step": 16030
|
11231 |
+
},
|
11232 |
+
{
|
11233 |
+
"epoch": 0.1326880919882533,
|
11234 |
+
"grad_norm": 1024.084716796875,
|
11235 |
+
"learning_rate": 9.97330527486959e-06,
|
11236 |
+
"loss": 130.6234,
|
11237 |
+
"step": 16040
|
11238 |
+
},
|
11239 |
+
{
|
11240 |
+
"epoch": 0.13277081523762255,
|
11241 |
+
"grad_norm": 1294.561279296875,
|
11242 |
+
"learning_rate": 9.973159532975691e-06,
|
11243 |
+
"loss": 122.6079,
|
11244 |
+
"step": 16050
|
11245 |
+
},
|
11246 |
+
{
|
11247 |
+
"epoch": 0.13285353848699177,
|
11248 |
+
"grad_norm": 1282.573486328125,
|
11249 |
+
"learning_rate": 9.973013395390314e-06,
|
11250 |
+
"loss": 173.6021,
|
11251 |
+
"step": 16060
|
11252 |
+
},
|
11253 |
+
{
|
11254 |
+
"epoch": 0.132936261736361,
|
11255 |
+
"grad_norm": 1436.6795654296875,
|
11256 |
+
"learning_rate": 9.972866862125083e-06,
|
11257 |
+
"loss": 201.6667,
|
11258 |
+
"step": 16070
|
11259 |
+
},
|
11260 |
+
{
|
11261 |
+
"epoch": 0.13301898498573023,
|
11262 |
+
"grad_norm": 880.5997924804688,
|
11263 |
+
"learning_rate": 9.972719933191657e-06,
|
11264 |
+
"loss": 121.1312,
|
11265 |
+
"step": 16080
|
11266 |
+
},
|
11267 |
+
{
|
11268 |
+
"epoch": 0.13310170823509948,
|
11269 |
+
"grad_norm": 720.5911254882812,
|
11270 |
+
"learning_rate": 9.97257260860173e-06,
|
11271 |
+
"loss": 117.1484,
|
11272 |
+
"step": 16090
|
11273 |
+
},
|
11274 |
+
{
|
11275 |
+
"epoch": 0.1331844314844687,
|
11276 |
+
"grad_norm": 1505.3927001953125,
|
11277 |
+
"learning_rate": 9.972424888367019e-06,
|
11278 |
+
"loss": 146.7309,
|
11279 |
+
"step": 16100
|
11280 |
+
},
|
11281 |
+
{
|
11282 |
+
"epoch": 0.13326715473383793,
|
11283 |
+
"grad_norm": 958.6402587890625,
|
11284 |
+
"learning_rate": 9.972276772499281e-06,
|
11285 |
+
"loss": 156.9766,
|
11286 |
+
"step": 16110
|
11287 |
+
},
|
11288 |
+
{
|
11289 |
+
"epoch": 0.1333498779832072,
|
11290 |
+
"grad_norm": 877.50244140625,
|
11291 |
+
"learning_rate": 9.9721282610103e-06,
|
11292 |
+
"loss": 191.0899,
|
11293 |
+
"step": 16120
|
11294 |
+
},
|
11295 |
+
{
|
11296 |
+
"epoch": 0.13343260123257641,
|
11297 |
+
"grad_norm": 1021.2138671875,
|
11298 |
+
"learning_rate": 9.971979353911891e-06,
|
11299 |
+
"loss": 133.9165,
|
11300 |
+
"step": 16130
|
11301 |
+
},
|
11302 |
+
{
|
11303 |
+
"epoch": 0.13351532448194564,
|
11304 |
+
"grad_norm": 847.0870971679688,
|
11305 |
+
"learning_rate": 9.971830051215905e-06,
|
11306 |
+
"loss": 101.3374,
|
11307 |
+
"step": 16140
|
11308 |
+
},
|
11309 |
+
{
|
11310 |
+
"epoch": 0.1335980477313149,
|
11311 |
+
"grad_norm": 2785.597412109375,
|
11312 |
+
"learning_rate": 9.97168035293422e-06,
|
11313 |
+
"loss": 267.7292,
|
11314 |
+
"step": 16150
|
11315 |
+
},
|
11316 |
+
{
|
11317 |
+
"epoch": 0.13368077098068412,
|
11318 |
+
"grad_norm": 801.3421020507812,
|
11319 |
+
"learning_rate": 9.971530259078743e-06,
|
11320 |
+
"loss": 111.4734,
|
11321 |
+
"step": 16160
|
11322 |
+
},
|
11323 |
+
{
|
11324 |
+
"epoch": 0.13376349423005335,
|
11325 |
+
"grad_norm": 768.2542114257812,
|
11326 |
+
"learning_rate": 9.971379769661422e-06,
|
11327 |
+
"loss": 149.4196,
|
11328 |
+
"step": 16170
|
11329 |
+
},
|
11330 |
+
{
|
11331 |
+
"epoch": 0.1338462174794226,
|
11332 |
+
"grad_norm": 893.0291748046875,
|
11333 |
+
"learning_rate": 9.971228884694228e-06,
|
11334 |
+
"loss": 122.37,
|
11335 |
+
"step": 16180
|
11336 |
+
},
|
11337 |
+
{
|
11338 |
+
"epoch": 0.13392894072879183,
|
11339 |
+
"grad_norm": 1295.072509765625,
|
11340 |
+
"learning_rate": 9.971077604189166e-06,
|
11341 |
+
"loss": 156.3286,
|
11342 |
+
"step": 16190
|
11343 |
+
},
|
11344 |
+
{
|
11345 |
+
"epoch": 0.13401166397816106,
|
11346 |
+
"grad_norm": 998.2085571289062,
|
11347 |
+
"learning_rate": 9.970925928158275e-06,
|
11348 |
+
"loss": 122.403,
|
11349 |
+
"step": 16200
|
11350 |
+
},
|
11351 |
+
{
|
11352 |
+
"epoch": 0.1340943872275303,
|
11353 |
+
"grad_norm": 701.3370361328125,
|
11354 |
+
"learning_rate": 9.970773856613617e-06,
|
11355 |
+
"loss": 140.6802,
|
11356 |
+
"step": 16210
|
11357 |
+
},
|
11358 |
+
{
|
11359 |
+
"epoch": 0.13417711047689954,
|
11360 |
+
"grad_norm": 971.6983032226562,
|
11361 |
+
"learning_rate": 9.970621389567301e-06,
|
11362 |
+
"loss": 178.1052,
|
11363 |
+
"step": 16220
|
11364 |
+
},
|
11365 |
+
{
|
11366 |
+
"epoch": 0.13425983372626876,
|
11367 |
+
"grad_norm": 2665.119384765625,
|
11368 |
+
"learning_rate": 9.97046852703145e-06,
|
11369 |
+
"loss": 138.6044,
|
11370 |
+
"step": 16230
|
11371 |
+
},
|
11372 |
+
{
|
11373 |
+
"epoch": 0.13434255697563802,
|
11374 |
+
"grad_norm": 2127.31884765625,
|
11375 |
+
"learning_rate": 9.970315269018231e-06,
|
11376 |
+
"loss": 157.2493,
|
11377 |
+
"step": 16240
|
11378 |
+
},
|
11379 |
+
{
|
11380 |
+
"epoch": 0.13442528022500724,
|
11381 |
+
"grad_norm": 1778.2391357421875,
|
11382 |
+
"learning_rate": 9.970161615539837e-06,
|
11383 |
+
"loss": 134.0471,
|
11384 |
+
"step": 16250
|
11385 |
+
},
|
11386 |
+
{
|
11387 |
+
"epoch": 0.13450800347437647,
|
11388 |
+
"grad_norm": 993.4716796875,
|
11389 |
+
"learning_rate": 9.970007566608492e-06,
|
11390 |
+
"loss": 146.2506,
|
11391 |
+
"step": 16260
|
11392 |
+
},
|
11393 |
+
{
|
11394 |
+
"epoch": 0.1345907267237457,
|
11395 |
+
"grad_norm": 798.2664184570312,
|
11396 |
+
"learning_rate": 9.969853122236455e-06,
|
11397 |
+
"loss": 114.1296,
|
11398 |
+
"step": 16270
|
11399 |
+
},
|
11400 |
+
{
|
11401 |
+
"epoch": 0.13467344997311495,
|
11402 |
+
"grad_norm": 703.0869750976562,
|
11403 |
+
"learning_rate": 9.969698282436013e-06,
|
11404 |
+
"loss": 120.5299,
|
11405 |
+
"step": 16280
|
11406 |
+
},
|
11407 |
+
{
|
11408 |
+
"epoch": 0.13475617322248418,
|
11409 |
+
"grad_norm": 1201.6317138671875,
|
11410 |
+
"learning_rate": 9.969543047219487e-06,
|
11411 |
+
"loss": 125.8007,
|
11412 |
+
"step": 16290
|
11413 |
+
},
|
11414 |
+
{
|
11415 |
+
"epoch": 0.1348388964718534,
|
11416 |
+
"grad_norm": 1785.0177001953125,
|
11417 |
+
"learning_rate": 9.969387416599227e-06,
|
11418 |
+
"loss": 144.5029,
|
11419 |
+
"step": 16300
|
11420 |
+
},
|
11421 |
+
{
|
11422 |
+
"epoch": 0.13492161972122266,
|
11423 |
+
"grad_norm": 1228.9619140625,
|
11424 |
+
"learning_rate": 9.969231390587618e-06,
|
11425 |
+
"loss": 164.9693,
|
11426 |
+
"step": 16310
|
11427 |
+
},
|
11428 |
+
{
|
11429 |
+
"epoch": 0.13500434297059188,
|
11430 |
+
"grad_norm": 864.3604736328125,
|
11431 |
+
"learning_rate": 9.969074969197072e-06,
|
11432 |
+
"loss": 168.7043,
|
11433 |
+
"step": 16320
|
11434 |
+
},
|
11435 |
+
{
|
11436 |
+
"epoch": 0.1350870662199611,
|
11437 |
+
"grad_norm": 1214.023681640625,
|
11438 |
+
"learning_rate": 9.968918152440036e-06,
|
11439 |
+
"loss": 172.751,
|
11440 |
+
"step": 16330
|
11441 |
+
},
|
11442 |
+
{
|
11443 |
+
"epoch": 0.13516978946933036,
|
11444 |
+
"grad_norm": 928.501220703125,
|
11445 |
+
"learning_rate": 9.968760940328987e-06,
|
11446 |
+
"loss": 131.5311,
|
11447 |
+
"step": 16340
|
11448 |
+
},
|
11449 |
+
{
|
11450 |
+
"epoch": 0.1352525127186996,
|
11451 |
+
"grad_norm": 510.1147155761719,
|
11452 |
+
"learning_rate": 9.968603332876435e-06,
|
11453 |
+
"loss": 171.1721,
|
11454 |
+
"step": 16350
|
11455 |
+
},
|
11456 |
+
{
|
11457 |
+
"epoch": 0.13533523596806882,
|
11458 |
+
"grad_norm": 1110.3807373046875,
|
11459 |
+
"learning_rate": 9.968445330094915e-06,
|
11460 |
+
"loss": 169.255,
|
11461 |
+
"step": 16360
|
11462 |
+
},
|
11463 |
+
{
|
11464 |
+
"epoch": 0.13541795921743807,
|
11465 |
+
"grad_norm": 1672.8614501953125,
|
11466 |
+
"learning_rate": 9.968286931997004e-06,
|
11467 |
+
"loss": 112.5926,
|
11468 |
+
"step": 16370
|
11469 |
+
},
|
11470 |
+
{
|
11471 |
+
"epoch": 0.1355006824668073,
|
11472 |
+
"grad_norm": 1014.0128784179688,
|
11473 |
+
"learning_rate": 9.968128138595304e-06,
|
11474 |
+
"loss": 100.9882,
|
11475 |
+
"step": 16380
|
11476 |
+
},
|
11477 |
+
{
|
11478 |
+
"epoch": 0.13558340571617652,
|
11479 |
+
"grad_norm": 1446.147216796875,
|
11480 |
+
"learning_rate": 9.967968949902448e-06,
|
11481 |
+
"loss": 185.0402,
|
11482 |
+
"step": 16390
|
11483 |
+
},
|
11484 |
+
{
|
11485 |
+
"epoch": 0.13566612896554578,
|
11486 |
+
"grad_norm": 753.0343627929688,
|
11487 |
+
"learning_rate": 9.967809365931102e-06,
|
11488 |
+
"loss": 148.759,
|
11489 |
+
"step": 16400
|
11490 |
+
},
|
11491 |
+
{
|
11492 |
+
"epoch": 0.135748852214915,
|
11493 |
+
"grad_norm": 909.8871459960938,
|
11494 |
+
"learning_rate": 9.967649386693964e-06,
|
11495 |
+
"loss": 123.6662,
|
11496 |
+
"step": 16410
|
11497 |
+
},
|
11498 |
+
{
|
11499 |
+
"epoch": 0.13583157546428423,
|
11500 |
+
"grad_norm": 1223.7244873046875,
|
11501 |
+
"learning_rate": 9.967489012203765e-06,
|
11502 |
+
"loss": 132.6178,
|
11503 |
+
"step": 16420
|
11504 |
+
},
|
11505 |
+
{
|
11506 |
+
"epoch": 0.13591429871365346,
|
11507 |
+
"grad_norm": 1106.0858154296875,
|
11508 |
+
"learning_rate": 9.967328242473261e-06,
|
11509 |
+
"loss": 146.9553,
|
11510 |
+
"step": 16430
|
11511 |
+
},
|
11512 |
+
{
|
11513 |
+
"epoch": 0.1359970219630227,
|
11514 |
+
"grad_norm": 1789.8829345703125,
|
11515 |
+
"learning_rate": 9.967167077515246e-06,
|
11516 |
+
"loss": 133.0784,
|
11517 |
+
"step": 16440
|
11518 |
+
},
|
11519 |
+
{
|
11520 |
+
"epoch": 0.13607974521239194,
|
11521 |
+
"grad_norm": 741.1414184570312,
|
11522 |
+
"learning_rate": 9.967005517342544e-06,
|
11523 |
+
"loss": 143.1583,
|
11524 |
+
"step": 16450
|
11525 |
+
},
|
11526 |
+
{
|
11527 |
+
"epoch": 0.13616246846176117,
|
11528 |
+
"grad_norm": 1324.021240234375,
|
11529 |
+
"learning_rate": 9.966843561968005e-06,
|
11530 |
+
"loss": 108.1861,
|
11531 |
+
"step": 16460
|
11532 |
+
},
|
11533 |
+
{
|
11534 |
+
"epoch": 0.13624519171113042,
|
11535 |
+
"grad_norm": 866.0011596679688,
|
11536 |
+
"learning_rate": 9.966681211404521e-06,
|
11537 |
+
"loss": 138.6324,
|
11538 |
+
"step": 16470
|
11539 |
+
},
|
11540 |
+
{
|
11541 |
+
"epoch": 0.13632791496049965,
|
11542 |
+
"grad_norm": 520.3377685546875,
|
11543 |
+
"learning_rate": 9.966518465665007e-06,
|
11544 |
+
"loss": 113.3134,
|
11545 |
+
"step": 16480
|
11546 |
+
},
|
11547 |
+
{
|
11548 |
+
"epoch": 0.13641063820986887,
|
11549 |
+
"grad_norm": 883.1153564453125,
|
11550 |
+
"learning_rate": 9.966355324762412e-06,
|
11551 |
+
"loss": 163.313,
|
11552 |
+
"step": 16490
|
11553 |
+
},
|
11554 |
+
{
|
11555 |
+
"epoch": 0.13649336145923813,
|
11556 |
+
"grad_norm": 1007.1843872070312,
|
11557 |
+
"learning_rate": 9.966191788709716e-06,
|
11558 |
+
"loss": 140.2184,
|
11559 |
+
"step": 16500
|
11560 |
+
},
|
11561 |
+
{
|
11562 |
+
"epoch": 0.13657608470860735,
|
11563 |
+
"grad_norm": 1669.2816162109375,
|
11564 |
+
"learning_rate": 9.966027857519931e-06,
|
11565 |
+
"loss": 188.2176,
|
11566 |
+
"step": 16510
|
11567 |
+
},
|
11568 |
+
{
|
11569 |
+
"epoch": 0.13665880795797658,
|
11570 |
+
"grad_norm": 772.6116943359375,
|
11571 |
+
"learning_rate": 9.9658635312061e-06,
|
11572 |
+
"loss": 163.7544,
|
11573 |
+
"step": 16520
|
11574 |
+
},
|
11575 |
+
{
|
11576 |
+
"epoch": 0.13674153120734583,
|
11577 |
+
"grad_norm": 706.4850463867188,
|
11578 |
+
"learning_rate": 9.965698809781298e-06,
|
11579 |
+
"loss": 121.3989,
|
11580 |
+
"step": 16530
|
11581 |
+
},
|
11582 |
+
{
|
11583 |
+
"epoch": 0.13682425445671506,
|
11584 |
+
"grad_norm": 766.0828247070312,
|
11585 |
+
"learning_rate": 9.965533693258632e-06,
|
11586 |
+
"loss": 213.4713,
|
11587 |
+
"step": 16540
|
11588 |
+
},
|
11589 |
+
{
|
11590 |
+
"epoch": 0.1369069777060843,
|
11591 |
+
"grad_norm": 957.917724609375,
|
11592 |
+
"learning_rate": 9.965368181651239e-06,
|
11593 |
+
"loss": 183.1273,
|
11594 |
+
"step": 16550
|
11595 |
+
},
|
11596 |
+
{
|
11597 |
+
"epoch": 0.13698970095545354,
|
11598 |
+
"grad_norm": 696.8062744140625,
|
11599 |
+
"learning_rate": 9.965202274972288e-06,
|
11600 |
+
"loss": 112.6891,
|
11601 |
+
"step": 16560
|
11602 |
+
},
|
11603 |
+
{
|
11604 |
+
"epoch": 0.13707242420482277,
|
11605 |
+
"grad_norm": 902.3621215820312,
|
11606 |
+
"learning_rate": 9.965035973234977e-06,
|
11607 |
+
"loss": 113.6838,
|
11608 |
+
"step": 16570
|
11609 |
+
},
|
11610 |
+
{
|
11611 |
+
"epoch": 0.137155147454192,
|
11612 |
+
"grad_norm": 1020.390625,
|
11613 |
+
"learning_rate": 9.964869276452542e-06,
|
11614 |
+
"loss": 106.0109,
|
11615 |
+
"step": 16580
|
11616 |
+
},
|
11617 |
+
{
|
11618 |
+
"epoch": 0.13723787070356125,
|
11619 |
+
"grad_norm": 1181.8326416015625,
|
11620 |
+
"learning_rate": 9.964702184638244e-06,
|
11621 |
+
"loss": 139.7021,
|
11622 |
+
"step": 16590
|
11623 |
+
},
|
11624 |
+
{
|
11625 |
+
"epoch": 0.13732059395293048,
|
11626 |
+
"grad_norm": 629.9285278320312,
|
11627 |
+
"learning_rate": 9.964534697805377e-06,
|
11628 |
+
"loss": 193.1732,
|
11629 |
+
"step": 16600
|
11630 |
+
},
|
11631 |
+
{
|
11632 |
+
"epoch": 0.1374033172022997,
|
11633 |
+
"grad_norm": 1531.7962646484375,
|
11634 |
+
"learning_rate": 9.96436681596727e-06,
|
11635 |
+
"loss": 154.7776,
|
11636 |
+
"step": 16610
|
11637 |
+
},
|
11638 |
+
{
|
11639 |
+
"epoch": 0.13748604045166893,
|
11640 |
+
"grad_norm": 1220.1796875,
|
11641 |
+
"learning_rate": 9.964198539137277e-06,
|
11642 |
+
"loss": 191.2195,
|
11643 |
+
"step": 16620
|
11644 |
+
},
|
11645 |
+
{
|
11646 |
+
"epoch": 0.13756876370103818,
|
11647 |
+
"grad_norm": 0.0,
|
11648 |
+
"learning_rate": 9.964029867328791e-06,
|
11649 |
+
"loss": 112.8693,
|
11650 |
+
"step": 16630
|
11651 |
+
},
|
11652 |
+
{
|
11653 |
+
"epoch": 0.1376514869504074,
|
11654 |
+
"grad_norm": 1105.817626953125,
|
11655 |
+
"learning_rate": 9.963860800555228e-06,
|
11656 |
+
"loss": 103.0777,
|
11657 |
+
"step": 16640
|
11658 |
+
},
|
11659 |
+
{
|
11660 |
+
"epoch": 0.13773421019977664,
|
11661 |
+
"grad_norm": 472.4584655761719,
|
11662 |
+
"learning_rate": 9.963691338830045e-06,
|
11663 |
+
"loss": 123.1952,
|
11664 |
+
"step": 16650
|
11665 |
+
},
|
11666 |
+
{
|
11667 |
+
"epoch": 0.1378169334491459,
|
11668 |
+
"grad_norm": 990.940673828125,
|
11669 |
+
"learning_rate": 9.963521482166718e-06,
|
11670 |
+
"loss": 136.4567,
|
11671 |
+
"step": 16660
|
11672 |
+
},
|
11673 |
+
{
|
11674 |
+
"epoch": 0.13789965669851512,
|
11675 |
+
"grad_norm": 1503.9461669921875,
|
11676 |
+
"learning_rate": 9.96335123057877e-06,
|
11677 |
+
"loss": 136.2858,
|
11678 |
+
"step": 16670
|
11679 |
+
},
|
11680 |
+
{
|
11681 |
+
"epoch": 0.13798237994788434,
|
11682 |
+
"grad_norm": 1348.58740234375,
|
11683 |
+
"learning_rate": 9.963180584079741e-06,
|
11684 |
+
"loss": 137.5341,
|
11685 |
+
"step": 16680
|
11686 |
+
},
|
11687 |
+
{
|
11688 |
+
"epoch": 0.1380651031972536,
|
11689 |
+
"grad_norm": 1100.0037841796875,
|
11690 |
+
"learning_rate": 9.963009542683214e-06,
|
11691 |
+
"loss": 199.9709,
|
11692 |
+
"step": 16690
|
11693 |
+
},
|
11694 |
+
{
|
11695 |
+
"epoch": 0.13814782644662282,
|
11696 |
+
"grad_norm": 718.8609619140625,
|
11697 |
+
"learning_rate": 9.962838106402791e-06,
|
11698 |
+
"loss": 184.6782,
|
11699 |
+
"step": 16700
|
11700 |
+
},
|
11701 |
+
{
|
11702 |
+
"epoch": 0.13823054969599205,
|
11703 |
+
"grad_norm": 865.5576782226562,
|
11704 |
+
"learning_rate": 9.962666275252117e-06,
|
11705 |
+
"loss": 104.1854,
|
11706 |
+
"step": 16710
|
11707 |
+
},
|
11708 |
+
{
|
11709 |
+
"epoch": 0.1383132729453613,
|
11710 |
+
"grad_norm": 1161.63525390625,
|
11711 |
+
"learning_rate": 9.962494049244866e-06,
|
11712 |
+
"loss": 169.3983,
|
11713 |
+
"step": 16720
|
11714 |
+
},
|
11715 |
+
{
|
11716 |
+
"epoch": 0.13839599619473053,
|
11717 |
+
"grad_norm": 589.0774536132812,
|
11718 |
+
"learning_rate": 9.962321428394735e-06,
|
11719 |
+
"loss": 165.776,
|
11720 |
+
"step": 16730
|
11721 |
+
},
|
11722 |
+
{
|
11723 |
+
"epoch": 0.13847871944409976,
|
11724 |
+
"grad_norm": 2693.160888671875,
|
11725 |
+
"learning_rate": 9.962148412715464e-06,
|
11726 |
+
"loss": 154.1448,
|
11727 |
+
"step": 16740
|
11728 |
+
},
|
11729 |
+
{
|
11730 |
+
"epoch": 0.138561442693469,
|
11731 |
+
"grad_norm": 1310.2269287109375,
|
11732 |
+
"learning_rate": 9.961975002220816e-06,
|
11733 |
+
"loss": 166.3599,
|
11734 |
+
"step": 16750
|
11735 |
+
},
|
11736 |
+
{
|
11737 |
+
"epoch": 0.13864416594283824,
|
11738 |
+
"grad_norm": 1167.153076171875,
|
11739 |
+
"learning_rate": 9.96180119692459e-06,
|
11740 |
+
"loss": 171.0495,
|
11741 |
+
"step": 16760
|
11742 |
+
},
|
11743 |
+
{
|
11744 |
+
"epoch": 0.13872688919220746,
|
11745 |
+
"grad_norm": 1377.29833984375,
|
11746 |
+
"learning_rate": 9.961626996840613e-06,
|
11747 |
+
"loss": 102.7167,
|
11748 |
+
"step": 16770
|
11749 |
+
},
|
11750 |
+
{
|
11751 |
+
"epoch": 0.13880961244157672,
|
11752 |
+
"grad_norm": 977.5831909179688,
|
11753 |
+
"learning_rate": 9.961452401982748e-06,
|
11754 |
+
"loss": 136.4004,
|
11755 |
+
"step": 16780
|
11756 |
+
},
|
11757 |
+
{
|
11758 |
+
"epoch": 0.13889233569094595,
|
11759 |
+
"grad_norm": 1010.1982421875,
|
11760 |
+
"learning_rate": 9.961277412364884e-06,
|
11761 |
+
"loss": 146.971,
|
11762 |
+
"step": 16790
|
11763 |
+
},
|
11764 |
+
{
|
11765 |
+
"epoch": 0.13897505894031517,
|
11766 |
+
"grad_norm": 814.7576293945312,
|
11767 |
+
"learning_rate": 9.961102028000948e-06,
|
11768 |
+
"loss": 213.2676,
|
11769 |
+
"step": 16800
|
11770 |
+
},
|
11771 |
+
{
|
11772 |
+
"epoch": 0.1390577821896844,
|
11773 |
+
"grad_norm": 881.7014770507812,
|
11774 |
+
"learning_rate": 9.96092624890489e-06,
|
11775 |
+
"loss": 91.0271,
|
11776 |
+
"step": 16810
|
11777 |
+
},
|
11778 |
+
{
|
11779 |
+
"epoch": 0.13914050543905365,
|
11780 |
+
"grad_norm": 4899.205078125,
|
11781 |
+
"learning_rate": 9.960750075090698e-06,
|
11782 |
+
"loss": 166.8467,
|
11783 |
+
"step": 16820
|
11784 |
+
},
|
11785 |
+
{
|
11786 |
+
"epoch": 0.13922322868842288,
|
11787 |
+
"grad_norm": 1270.030029296875,
|
11788 |
+
"learning_rate": 9.960573506572391e-06,
|
11789 |
+
"loss": 186.535,
|
11790 |
+
"step": 16830
|
11791 |
+
},
|
11792 |
+
{
|
11793 |
+
"epoch": 0.1393059519377921,
|
11794 |
+
"grad_norm": 1338.3089599609375,
|
11795 |
+
"learning_rate": 9.960396543364013e-06,
|
11796 |
+
"loss": 192.4324,
|
11797 |
+
"step": 16840
|
11798 |
+
},
|
11799 |
+
{
|
11800 |
+
"epoch": 0.13938867518716136,
|
11801 |
+
"grad_norm": 1512.3917236328125,
|
11802 |
+
"learning_rate": 9.96021918547965e-06,
|
11803 |
+
"loss": 124.9194,
|
11804 |
+
"step": 16850
|
11805 |
+
},
|
11806 |
+
{
|
11807 |
+
"epoch": 0.13947139843653059,
|
11808 |
+
"grad_norm": 1637.7535400390625,
|
11809 |
+
"learning_rate": 9.96004143293341e-06,
|
11810 |
+
"loss": 131.2566,
|
11811 |
+
"step": 16860
|
11812 |
+
},
|
11813 |
+
{
|
11814 |
+
"epoch": 0.1395541216858998,
|
11815 |
+
"grad_norm": 1564.211669921875,
|
11816 |
+
"learning_rate": 9.959863285739436e-06,
|
11817 |
+
"loss": 124.8255,
|
11818 |
+
"step": 16870
|
11819 |
+
},
|
11820 |
+
{
|
11821 |
+
"epoch": 0.13963684493526907,
|
11822 |
+
"grad_norm": 720.8834228515625,
|
11823 |
+
"learning_rate": 9.959684743911904e-06,
|
11824 |
+
"loss": 140.7759,
|
11825 |
+
"step": 16880
|
11826 |
+
},
|
11827 |
+
{
|
11828 |
+
"epoch": 0.1397195681846383,
|
11829 |
+
"grad_norm": 796.6300659179688,
|
11830 |
+
"learning_rate": 9.959505807465018e-06,
|
11831 |
+
"loss": 120.1176,
|
11832 |
+
"step": 16890
|
11833 |
+
},
|
11834 |
+
{
|
11835 |
+
"epoch": 0.13980229143400752,
|
11836 |
+
"grad_norm": 1232.4276123046875,
|
11837 |
+
"learning_rate": 9.959326476413016e-06,
|
11838 |
+
"loss": 130.2664,
|
11839 |
+
"step": 16900
|
11840 |
+
},
|
11841 |
+
{
|
11842 |
+
"epoch": 0.13988501468337677,
|
11843 |
+
"grad_norm": 457.3919677734375,
|
11844 |
+
"learning_rate": 9.959146750770167e-06,
|
11845 |
+
"loss": 124.8512,
|
11846 |
+
"step": 16910
|
11847 |
+
},
|
11848 |
+
{
|
11849 |
+
"epoch": 0.139967737932746,
|
11850 |
+
"grad_norm": 708.2092895507812,
|
11851 |
+
"learning_rate": 9.95896663055077e-06,
|
11852 |
+
"loss": 120.5444,
|
11853 |
+
"step": 16920
|
11854 |
+
},
|
11855 |
+
{
|
11856 |
+
"epoch": 0.14005046118211523,
|
11857 |
+
"grad_norm": 995.7003784179688,
|
11858 |
+
"learning_rate": 9.958786115769157e-06,
|
11859 |
+
"loss": 114.9213,
|
11860 |
+
"step": 16930
|
11861 |
+
},
|
11862 |
+
{
|
11863 |
+
"epoch": 0.14013318443148448,
|
11864 |
+
"grad_norm": 1515.4827880859375,
|
11865 |
+
"learning_rate": 9.958605206439692e-06,
|
11866 |
+
"loss": 146.7894,
|
11867 |
+
"step": 16940
|
11868 |
+
},
|
11869 |
+
{
|
11870 |
+
"epoch": 0.1402159076808537,
|
11871 |
+
"grad_norm": 814.6317138671875,
|
11872 |
+
"learning_rate": 9.958423902576764e-06,
|
11873 |
+
"loss": 99.1024,
|
11874 |
+
"step": 16950
|
11875 |
+
},
|
11876 |
+
{
|
11877 |
+
"epoch": 0.14029863093022293,
|
11878 |
+
"grad_norm": 760.5602416992188,
|
11879 |
+
"learning_rate": 9.958242204194804e-06,
|
11880 |
+
"loss": 160.827,
|
11881 |
+
"step": 16960
|
11882 |
+
},
|
11883 |
+
{
|
11884 |
+
"epoch": 0.1403813541795922,
|
11885 |
+
"grad_norm": 738.33349609375,
|
11886 |
+
"learning_rate": 9.958060111308267e-06,
|
11887 |
+
"loss": 136.0457,
|
11888 |
+
"step": 16970
|
11889 |
+
},
|
11890 |
+
{
|
11891 |
+
"epoch": 0.14046407742896141,
|
11892 |
+
"grad_norm": 1149.28857421875,
|
11893 |
+
"learning_rate": 9.957877623931642e-06,
|
11894 |
+
"loss": 151.1577,
|
11895 |
+
"step": 16980
|
11896 |
+
},
|
11897 |
+
{
|
11898 |
+
"epoch": 0.14054680067833064,
|
11899 |
+
"grad_norm": 1362.2108154296875,
|
11900 |
+
"learning_rate": 9.95769474207945e-06,
|
11901 |
+
"loss": 173.5694,
|
11902 |
+
"step": 16990
|
11903 |
+
},
|
11904 |
+
{
|
11905 |
+
"epoch": 0.14062952392769987,
|
11906 |
+
"grad_norm": 1314.1846923828125,
|
11907 |
+
"learning_rate": 9.957511465766236e-06,
|
11908 |
+
"loss": 169.4035,
|
11909 |
+
"step": 17000
|
11910 |
+
},
|
11911 |
+
{
|
11912 |
+
"epoch": 0.14071224717706912,
|
11913 |
+
"grad_norm": 1065.3922119140625,
|
11914 |
+
"learning_rate": 9.957327795006589e-06,
|
11915 |
+
"loss": 169.1779,
|
11916 |
+
"step": 17010
|
11917 |
+
},
|
11918 |
+
{
|
11919 |
+
"epoch": 0.14079497042643835,
|
11920 |
+
"grad_norm": 1382.638427734375,
|
11921 |
+
"learning_rate": 9.95714372981512e-06,
|
11922 |
+
"loss": 145.6161,
|
11923 |
+
"step": 17020
|
11924 |
+
},
|
11925 |
+
{
|
11926 |
+
"epoch": 0.14087769367580757,
|
11927 |
+
"grad_norm": 1485.4481201171875,
|
11928 |
+
"learning_rate": 9.956959270206474e-06,
|
11929 |
+
"loss": 131.7884,
|
11930 |
+
"step": 17030
|
11931 |
+
},
|
11932 |
+
{
|
11933 |
+
"epoch": 0.14096041692517683,
|
11934 |
+
"grad_norm": 901.7747192382812,
|
11935 |
+
"learning_rate": 9.956774416195329e-06,
|
11936 |
+
"loss": 129.2612,
|
11937 |
+
"step": 17040
|
11938 |
+
},
|
11939 |
+
{
|
11940 |
+
"epoch": 0.14104314017454606,
|
11941 |
+
"grad_norm": 1346.950439453125,
|
11942 |
+
"learning_rate": 9.956589167796392e-06,
|
11943 |
+
"loss": 108.1172,
|
11944 |
+
"step": 17050
|
11945 |
+
},
|
11946 |
+
{
|
11947 |
+
"epoch": 0.14112586342391528,
|
11948 |
+
"grad_norm": 857.8418579101562,
|
11949 |
+
"learning_rate": 9.956403525024402e-06,
|
11950 |
+
"loss": 132.697,
|
11951 |
+
"step": 17060
|
11952 |
+
},
|
11953 |
+
{
|
11954 |
+
"epoch": 0.14120858667328454,
|
11955 |
+
"grad_norm": 1938.1868896484375,
|
11956 |
+
"learning_rate": 9.956217487894131e-06,
|
11957 |
+
"loss": 165.6452,
|
11958 |
+
"step": 17070
|
11959 |
+
},
|
11960 |
+
{
|
11961 |
+
"epoch": 0.14129130992265376,
|
11962 |
+
"grad_norm": 749.3518676757812,
|
11963 |
+
"learning_rate": 9.95603105642038e-06,
|
11964 |
+
"loss": 212.4321,
|
11965 |
+
"step": 17080
|
11966 |
+
},
|
11967 |
+
{
|
11968 |
+
"epoch": 0.141374033172023,
|
11969 |
+
"grad_norm": 709.408447265625,
|
11970 |
+
"learning_rate": 9.955844230617985e-06,
|
11971 |
+
"loss": 156.41,
|
11972 |
+
"step": 17090
|
11973 |
+
},
|
11974 |
+
{
|
11975 |
+
"epoch": 0.14145675642139224,
|
11976 |
+
"grad_norm": 1008.6261596679688,
|
11977 |
+
"learning_rate": 9.955657010501807e-06,
|
11978 |
+
"loss": 118.0272,
|
11979 |
+
"step": 17100
|
11980 |
+
},
|
11981 |
+
{
|
11982 |
+
"epoch": 0.14153947967076147,
|
11983 |
+
"grad_norm": 828.895751953125,
|
11984 |
+
"learning_rate": 9.955469396086743e-06,
|
11985 |
+
"loss": 138.8411,
|
11986 |
+
"step": 17110
|
11987 |
+
},
|
11988 |
+
{
|
11989 |
+
"epoch": 0.1416222029201307,
|
11990 |
+
"grad_norm": 1362.32421875,
|
11991 |
+
"learning_rate": 9.955281387387724e-06,
|
11992 |
+
"loss": 145.7589,
|
11993 |
+
"step": 17120
|
11994 |
+
},
|
11995 |
+
{
|
11996 |
+
"epoch": 0.14170492616949995,
|
11997 |
+
"grad_norm": 1597.079345703125,
|
11998 |
+
"learning_rate": 9.955092984419705e-06,
|
11999 |
+
"loss": 170.475,
|
12000 |
+
"step": 17130
|
12001 |
+
},
|
12002 |
+
{
|
12003 |
+
"epoch": 0.14178764941886918,
|
12004 |
+
"grad_norm": 1059.4306640625,
|
12005 |
+
"learning_rate": 9.954904187197679e-06,
|
12006 |
+
"loss": 158.0434,
|
12007 |
+
"step": 17140
|
12008 |
+
},
|
12009 |
+
{
|
12010 |
+
"epoch": 0.1418703726682384,
|
12011 |
+
"grad_norm": 694.0506591796875,
|
12012 |
+
"learning_rate": 9.954714995736667e-06,
|
12013 |
+
"loss": 142.6755,
|
12014 |
+
"step": 17150
|
12015 |
+
},
|
12016 |
+
{
|
12017 |
+
"epoch": 0.14195309591760763,
|
12018 |
+
"grad_norm": 1392.7862548828125,
|
12019 |
+
"learning_rate": 9.95452541005172e-06,
|
12020 |
+
"loss": 192.9698,
|
12021 |
+
"step": 17160
|
12022 |
+
},
|
12023 |
+
{
|
12024 |
+
"epoch": 0.14203581916697688,
|
12025 |
+
"grad_norm": 1239.712646484375,
|
12026 |
+
"learning_rate": 9.954335430157926e-06,
|
12027 |
+
"loss": 126.2119,
|
12028 |
+
"step": 17170
|
12029 |
+
},
|
12030 |
+
{
|
12031 |
+
"epoch": 0.1421185424163461,
|
12032 |
+
"grad_norm": 949.230712890625,
|
12033 |
+
"learning_rate": 9.9541450560704e-06,
|
12034 |
+
"loss": 76.8772,
|
12035 |
+
"step": 17180
|
12036 |
+
},
|
12037 |
+
{
|
12038 |
+
"epoch": 0.14220126566571534,
|
12039 |
+
"grad_norm": 1190.4364013671875,
|
12040 |
+
"learning_rate": 9.953954287804286e-06,
|
12041 |
+
"loss": 156.7768,
|
12042 |
+
"step": 17190
|
12043 |
+
},
|
12044 |
+
{
|
12045 |
+
"epoch": 0.1422839889150846,
|
12046 |
+
"grad_norm": 1422.4742431640625,
|
12047 |
+
"learning_rate": 9.953763125374767e-06,
|
12048 |
+
"loss": 107.7513,
|
12049 |
+
"step": 17200
|
12050 |
+
},
|
12051 |
+
{
|
12052 |
+
"epoch": 0.14236671216445382,
|
12053 |
+
"grad_norm": 1076.408935546875,
|
12054 |
+
"learning_rate": 9.953571568797049e-06,
|
12055 |
+
"loss": 136.0641,
|
12056 |
+
"step": 17210
|
12057 |
+
},
|
12058 |
+
{
|
12059 |
+
"epoch": 0.14244943541382304,
|
12060 |
+
"grad_norm": 930.828125,
|
12061 |
+
"learning_rate": 9.953379618086377e-06,
|
12062 |
+
"loss": 143.9599,
|
12063 |
+
"step": 17220
|
12064 |
+
},
|
12065 |
+
{
|
12066 |
+
"epoch": 0.1425321586631923,
|
12067 |
+
"grad_norm": 1367.8873291015625,
|
12068 |
+
"learning_rate": 9.95318727325802e-06,
|
12069 |
+
"loss": 128.7768,
|
12070 |
+
"step": 17230
|
12071 |
+
},
|
12072 |
+
{
|
12073 |
+
"epoch": 0.14261488191256153,
|
12074 |
+
"grad_norm": 1150.171875,
|
12075 |
+
"learning_rate": 9.952994534327283e-06,
|
12076 |
+
"loss": 124.427,
|
12077 |
+
"step": 17240
|
12078 |
+
},
|
12079 |
+
{
|
12080 |
+
"epoch": 0.14269760516193075,
|
12081 |
+
"grad_norm": 821.237548828125,
|
12082 |
+
"learning_rate": 9.952801401309504e-06,
|
12083 |
+
"loss": 137.096,
|
12084 |
+
"step": 17250
|
12085 |
+
},
|
12086 |
+
{
|
12087 |
+
"epoch": 0.1427803284113,
|
12088 |
+
"grad_norm": 1357.8616943359375,
|
12089 |
+
"learning_rate": 9.952607874220048e-06,
|
12090 |
+
"loss": 201.047,
|
12091 |
+
"step": 17260
|
12092 |
+
},
|
12093 |
+
{
|
12094 |
+
"epoch": 0.14286305166066923,
|
12095 |
+
"grad_norm": 1452.91650390625,
|
12096 |
+
"learning_rate": 9.952413953074312e-06,
|
12097 |
+
"loss": 199.8793,
|
12098 |
+
"step": 17270
|
12099 |
+
},
|
12100 |
+
{
|
12101 |
+
"epoch": 0.14294577491003846,
|
12102 |
+
"grad_norm": 965.8828125,
|
12103 |
+
"learning_rate": 9.952219637887725e-06,
|
12104 |
+
"loss": 129.7407,
|
12105 |
+
"step": 17280
|
12106 |
+
},
|
12107 |
+
{
|
12108 |
+
"epoch": 0.1430284981594077,
|
12109 |
+
"grad_norm": 1721.4344482421875,
|
12110 |
+
"learning_rate": 9.952024928675752e-06,
|
12111 |
+
"loss": 177.8543,
|
12112 |
+
"step": 17290
|
12113 |
+
},
|
12114 |
+
{
|
12115 |
+
"epoch": 0.14311122140877694,
|
12116 |
+
"grad_norm": 3541.317626953125,
|
12117 |
+
"learning_rate": 9.951829825453881e-06,
|
12118 |
+
"loss": 167.7698,
|
12119 |
+
"step": 17300
|
12120 |
+
},
|
12121 |
+
{
|
12122 |
+
"epoch": 0.14319394465814617,
|
12123 |
+
"grad_norm": 2036.2423095703125,
|
12124 |
+
"learning_rate": 9.951634328237635e-06,
|
12125 |
+
"loss": 141.8449,
|
12126 |
+
"step": 17310
|
12127 |
+
},
|
12128 |
+
{
|
12129 |
+
"epoch": 0.14327666790751542,
|
12130 |
+
"grad_norm": 880.5416870117188,
|
12131 |
+
"learning_rate": 9.951438437042572e-06,
|
12132 |
+
"loss": 198.8033,
|
12133 |
+
"step": 17320
|
12134 |
+
},
|
12135 |
+
{
|
12136 |
+
"epoch": 0.14335939115688465,
|
12137 |
+
"grad_norm": 807.236572265625,
|
12138 |
+
"learning_rate": 9.951242151884275e-06,
|
12139 |
+
"loss": 112.0078,
|
12140 |
+
"step": 17330
|
12141 |
+
},
|
12142 |
+
{
|
12143 |
+
"epoch": 0.14344211440625387,
|
12144 |
+
"grad_norm": 1530.7301025390625,
|
12145 |
+
"learning_rate": 9.951045472778365e-06,
|
12146 |
+
"loss": 133.3953,
|
12147 |
+
"step": 17340
|
12148 |
+
},
|
12149 |
+
{
|
12150 |
+
"epoch": 0.1435248376556231,
|
12151 |
+
"grad_norm": 1775.3485107421875,
|
12152 |
+
"learning_rate": 9.950848399740488e-06,
|
12153 |
+
"loss": 132.5112,
|
12154 |
+
"step": 17350
|
12155 |
+
},
|
12156 |
+
{
|
12157 |
+
"epoch": 0.14360756090499235,
|
12158 |
+
"grad_norm": 1216.1314697265625,
|
12159 |
+
"learning_rate": 9.950650932786325e-06,
|
12160 |
+
"loss": 150.7454,
|
12161 |
+
"step": 17360
|
12162 |
+
},
|
12163 |
+
{
|
12164 |
+
"epoch": 0.14369028415436158,
|
12165 |
+
"grad_norm": 756.1212158203125,
|
12166 |
+
"learning_rate": 9.95045307193159e-06,
|
12167 |
+
"loss": 114.4585,
|
12168 |
+
"step": 17370
|
12169 |
+
},
|
12170 |
+
{
|
12171 |
+
"epoch": 0.1437730074037308,
|
12172 |
+
"grad_norm": 987.248779296875,
|
12173 |
+
"learning_rate": 9.95025481719202e-06,
|
12174 |
+
"loss": 140.8504,
|
12175 |
+
"step": 17380
|
12176 |
+
},
|
12177 |
+
{
|
12178 |
+
"epoch": 0.14385573065310006,
|
12179 |
+
"grad_norm": 1126.249267578125,
|
12180 |
+
"learning_rate": 9.950056168583395e-06,
|
12181 |
+
"loss": 225.9696,
|
12182 |
+
"step": 17390
|
12183 |
+
},
|
12184 |
+
{
|
12185 |
+
"epoch": 0.1439384539024693,
|
12186 |
+
"grad_norm": 706.3463745117188,
|
12187 |
+
"learning_rate": 9.949857126121519e-06,
|
12188 |
+
"loss": 113.696,
|
12189 |
+
"step": 17400
|
12190 |
+
},
|
12191 |
+
{
|
12192 |
+
"epoch": 0.14402117715183851,
|
12193 |
+
"grad_norm": 892.3402099609375,
|
12194 |
+
"learning_rate": 9.949657689822226e-06,
|
12195 |
+
"loss": 162.9231,
|
12196 |
+
"step": 17410
|
12197 |
+
},
|
12198 |
+
{
|
12199 |
+
"epoch": 0.14410390040120777,
|
12200 |
+
"grad_norm": 856.6466674804688,
|
12201 |
+
"learning_rate": 9.949457859701388e-06,
|
12202 |
+
"loss": 99.4635,
|
12203 |
+
"step": 17420
|
12204 |
+
},
|
12205 |
+
{
|
12206 |
+
"epoch": 0.144186623650577,
|
12207 |
+
"grad_norm": 775.4996948242188,
|
12208 |
+
"learning_rate": 9.949257635774903e-06,
|
12209 |
+
"loss": 152.7363,
|
12210 |
+
"step": 17430
|
12211 |
+
},
|
12212 |
+
{
|
12213 |
+
"epoch": 0.14426934689994622,
|
12214 |
+
"grad_norm": 842.1768798828125,
|
12215 |
+
"learning_rate": 9.9490570180587e-06,
|
12216 |
+
"loss": 85.8346,
|
12217 |
+
"step": 17440
|
12218 |
+
},
|
12219 |
+
{
|
12220 |
+
"epoch": 0.14435207014931548,
|
12221 |
+
"grad_norm": 1798.95849609375,
|
12222 |
+
"learning_rate": 9.948856006568746e-06,
|
12223 |
+
"loss": 197.5757,
|
12224 |
+
"step": 17450
|
12225 |
+
},
|
12226 |
+
{
|
12227 |
+
"epoch": 0.1444347933986847,
|
12228 |
+
"grad_norm": 1381.5155029296875,
|
12229 |
+
"learning_rate": 9.94865460132103e-06,
|
12230 |
+
"loss": 150.2531,
|
12231 |
+
"step": 17460
|
12232 |
+
},
|
12233 |
+
{
|
12234 |
+
"epoch": 0.14451751664805393,
|
12235 |
+
"grad_norm": 997.7630004882812,
|
12236 |
+
"learning_rate": 9.948452802331578e-06,
|
12237 |
+
"loss": 133.1603,
|
12238 |
+
"step": 17470
|
12239 |
+
},
|
12240 |
+
{
|
12241 |
+
"epoch": 0.14460023989742318,
|
12242 |
+
"grad_norm": 1275.1690673828125,
|
12243 |
+
"learning_rate": 9.948250609616449e-06,
|
12244 |
+
"loss": 168.5733,
|
12245 |
+
"step": 17480
|
12246 |
+
},
|
12247 |
+
{
|
12248 |
+
"epoch": 0.1446829631467924,
|
12249 |
+
"grad_norm": 1112.8721923828125,
|
12250 |
+
"learning_rate": 9.948048023191728e-06,
|
12251 |
+
"loss": 182.301,
|
12252 |
+
"step": 17490
|
12253 |
+
},
|
12254 |
+
{
|
12255 |
+
"epoch": 0.14476568639616164,
|
12256 |
+
"grad_norm": 950.4414672851562,
|
12257 |
+
"learning_rate": 9.947845043073533e-06,
|
12258 |
+
"loss": 149.5477,
|
12259 |
+
"step": 17500
|
12260 |
+
},
|
12261 |
+
{
|
12262 |
+
"epoch": 0.1448484096455309,
|
12263 |
+
"grad_norm": 1122.95751953125,
|
12264 |
+
"learning_rate": 9.947641669278016e-06,
|
12265 |
+
"loss": 123.1119,
|
12266 |
+
"step": 17510
|
12267 |
+
},
|
12268 |
+
{
|
12269 |
+
"epoch": 0.14493113289490012,
|
12270 |
+
"grad_norm": 1148.9334716796875,
|
12271 |
+
"learning_rate": 9.947437901821358e-06,
|
12272 |
+
"loss": 128.3063,
|
12273 |
+
"step": 17520
|
12274 |
+
},
|
12275 |
+
{
|
12276 |
+
"epoch": 0.14501385614426934,
|
12277 |
+
"grad_norm": 1392.179443359375,
|
12278 |
+
"learning_rate": 9.947233740719772e-06,
|
12279 |
+
"loss": 139.3278,
|
12280 |
+
"step": 17530
|
12281 |
+
},
|
12282 |
+
{
|
12283 |
+
"epoch": 0.14509657939363857,
|
12284 |
+
"grad_norm": 604.5231323242188,
|
12285 |
+
"learning_rate": 9.947029185989501e-06,
|
12286 |
+
"loss": 163.2896,
|
12287 |
+
"step": 17540
|
12288 |
+
},
|
12289 |
+
{
|
12290 |
+
"epoch": 0.14517930264300782,
|
12291 |
+
"grad_norm": 1102.948486328125,
|
12292 |
+
"learning_rate": 9.946824237646823e-06,
|
12293 |
+
"loss": 153.8839,
|
12294 |
+
"step": 17550
|
12295 |
+
},
|
12296 |
+
{
|
12297 |
+
"epoch": 0.14526202589237705,
|
12298 |
+
"grad_norm": 2167.79638671875,
|
12299 |
+
"learning_rate": 9.946618895708043e-06,
|
12300 |
+
"loss": 172.0367,
|
12301 |
+
"step": 17560
|
12302 |
+
},
|
12303 |
+
{
|
12304 |
+
"epoch": 0.14534474914174628,
|
12305 |
+
"grad_norm": 1476.7362060546875,
|
12306 |
+
"learning_rate": 9.946413160189498e-06,
|
12307 |
+
"loss": 138.3295,
|
12308 |
+
"step": 17570
|
12309 |
+
},
|
12310 |
+
{
|
12311 |
+
"epoch": 0.14542747239111553,
|
12312 |
+
"grad_norm": 882.1810913085938,
|
12313 |
+
"learning_rate": 9.946207031107562e-06,
|
12314 |
+
"loss": 186.2194,
|
12315 |
+
"step": 17580
|
12316 |
+
},
|
12317 |
+
{
|
12318 |
+
"epoch": 0.14551019564048476,
|
12319 |
+
"grad_norm": 2111.673095703125,
|
12320 |
+
"learning_rate": 9.94600050847863e-06,
|
12321 |
+
"loss": 170.1872,
|
12322 |
+
"step": 17590
|
12323 |
+
},
|
12324 |
+
{
|
12325 |
+
"epoch": 0.14559291888985398,
|
12326 |
+
"grad_norm": 1099.032958984375,
|
12327 |
+
"learning_rate": 9.945793592319137e-06,
|
12328 |
+
"loss": 128.6498,
|
12329 |
+
"step": 17600
|
12330 |
+
},
|
12331 |
+
{
|
12332 |
+
"epoch": 0.14567564213922324,
|
12333 |
+
"grad_norm": 1059.4005126953125,
|
12334 |
+
"learning_rate": 9.945586282645545e-06,
|
12335 |
+
"loss": 134.5357,
|
12336 |
+
"step": 17610
|
12337 |
+
},
|
12338 |
+
{
|
12339 |
+
"epoch": 0.14575836538859246,
|
12340 |
+
"grad_norm": 1566.564208984375,
|
12341 |
+
"learning_rate": 9.945378579474351e-06,
|
12342 |
+
"loss": 164.359,
|
12343 |
+
"step": 17620
|
12344 |
+
},
|
12345 |
+
{
|
12346 |
+
"epoch": 0.1458410886379617,
|
12347 |
+
"grad_norm": 782.61279296875,
|
12348 |
+
"learning_rate": 9.945170482822079e-06,
|
12349 |
+
"loss": 106.899,
|
12350 |
+
"step": 17630
|
12351 |
+
},
|
12352 |
+
{
|
12353 |
+
"epoch": 0.14592381188733095,
|
12354 |
+
"grad_norm": 1026.7816162109375,
|
12355 |
+
"learning_rate": 9.944961992705288e-06,
|
12356 |
+
"loss": 142.0462,
|
12357 |
+
"step": 17640
|
12358 |
+
},
|
12359 |
+
{
|
12360 |
+
"epoch": 0.14600653513670017,
|
12361 |
+
"grad_norm": 817.039306640625,
|
12362 |
+
"learning_rate": 9.944753109140564e-06,
|
12363 |
+
"loss": 166.4367,
|
12364 |
+
"step": 17650
|
12365 |
+
},
|
12366 |
+
{
|
12367 |
+
"epoch": 0.1460892583860694,
|
12368 |
+
"grad_norm": 856.3842163085938,
|
12369 |
+
"learning_rate": 9.94454383214453e-06,
|
12370 |
+
"loss": 131.3289,
|
12371 |
+
"step": 17660
|
12372 |
+
},
|
12373 |
+
{
|
12374 |
+
"epoch": 0.14617198163543865,
|
12375 |
+
"grad_norm": 1656.999755859375,
|
12376 |
+
"learning_rate": 9.944334161733835e-06,
|
12377 |
+
"loss": 129.1978,
|
12378 |
+
"step": 17670
|
12379 |
+
},
|
12380 |
+
{
|
12381 |
+
"epoch": 0.14625470488480788,
|
12382 |
+
"grad_norm": 1338.8382568359375,
|
12383 |
+
"learning_rate": 9.944124097925161e-06,
|
12384 |
+
"loss": 184.4288,
|
12385 |
+
"step": 17680
|
12386 |
+
},
|
12387 |
+
{
|
12388 |
+
"epoch": 0.1463374281341771,
|
12389 |
+
"grad_norm": 1011.0686645507812,
|
12390 |
+
"learning_rate": 9.943913640735224e-06,
|
12391 |
+
"loss": 127.4451,
|
12392 |
+
"step": 17690
|
12393 |
+
},
|
12394 |
+
{
|
12395 |
+
"epoch": 0.14642015138354633,
|
12396 |
+
"grad_norm": 923.1884765625,
|
12397 |
+
"learning_rate": 9.94370279018077e-06,
|
12398 |
+
"loss": 120.5529,
|
12399 |
+
"step": 17700
|
12400 |
+
},
|
12401 |
+
{
|
12402 |
+
"epoch": 0.1465028746329156,
|
12403 |
+
"grad_norm": 1001.093505859375,
|
12404 |
+
"learning_rate": 9.94349154627857e-06,
|
12405 |
+
"loss": 129.3988,
|
12406 |
+
"step": 17710
|
12407 |
+
},
|
12408 |
+
{
|
12409 |
+
"epoch": 0.1465855978822848,
|
12410 |
+
"grad_norm": 1354.6356201171875,
|
12411 |
+
"learning_rate": 9.943279909045438e-06,
|
12412 |
+
"loss": 122.9835,
|
12413 |
+
"step": 17720
|
12414 |
+
},
|
12415 |
+
{
|
12416 |
+
"epoch": 0.14666832113165404,
|
12417 |
+
"grad_norm": 1260.7392578125,
|
12418 |
+
"learning_rate": 9.94306787849821e-06,
|
12419 |
+
"loss": 101.6319,
|
12420 |
+
"step": 17730
|
12421 |
+
},
|
12422 |
+
{
|
12423 |
+
"epoch": 0.1467510443810233,
|
12424 |
+
"grad_norm": 1424.63330078125,
|
12425 |
+
"learning_rate": 9.942855454653755e-06,
|
12426 |
+
"loss": 179.1118,
|
12427 |
+
"step": 17740
|
12428 |
+
},
|
12429 |
+
{
|
12430 |
+
"epoch": 0.14683376763039252,
|
12431 |
+
"grad_norm": 1053.8809814453125,
|
12432 |
+
"learning_rate": 9.942642637528977e-06,
|
12433 |
+
"loss": 167.5939,
|
12434 |
+
"step": 17750
|
12435 |
+
},
|
12436 |
+
{
|
12437 |
+
"epoch": 0.14691649087976175,
|
12438 |
+
"grad_norm": 936.3515014648438,
|
12439 |
+
"learning_rate": 9.942429427140807e-06,
|
12440 |
+
"loss": 154.7948,
|
12441 |
+
"step": 17760
|
12442 |
+
},
|
12443 |
+
{
|
12444 |
+
"epoch": 0.146999214129131,
|
12445 |
+
"grad_norm": 876.3916015625,
|
12446 |
+
"learning_rate": 9.942215823506211e-06,
|
12447 |
+
"loss": 114.5722,
|
12448 |
+
"step": 17770
|
12449 |
+
},
|
12450 |
+
{
|
12451 |
+
"epoch": 0.14708193737850023,
|
12452 |
+
"grad_norm": 1172.0423583984375,
|
12453 |
+
"learning_rate": 9.942001826642184e-06,
|
12454 |
+
"loss": 142.9646,
|
12455 |
+
"step": 17780
|
12456 |
+
},
|
12457 |
+
{
|
12458 |
+
"epoch": 0.14716466062786945,
|
12459 |
+
"grad_norm": 1635.97802734375,
|
12460 |
+
"learning_rate": 9.941787436565751e-06,
|
12461 |
+
"loss": 150.69,
|
12462 |
+
"step": 17790
|
12463 |
+
},
|
12464 |
+
{
|
12465 |
+
"epoch": 0.1472473838772387,
|
12466 |
+
"grad_norm": 728.3792724609375,
|
12467 |
+
"learning_rate": 9.941572653293974e-06,
|
12468 |
+
"loss": 97.5937,
|
12469 |
+
"step": 17800
|
12470 |
+
},
|
12471 |
+
{
|
12472 |
+
"epoch": 0.14733010712660793,
|
12473 |
+
"grad_norm": 935.0343627929688,
|
12474 |
+
"learning_rate": 9.941357476843938e-06,
|
12475 |
+
"loss": 135.0443,
|
12476 |
+
"step": 17810
|
12477 |
+
},
|
12478 |
+
{
|
12479 |
+
"epoch": 0.14741283037597716,
|
12480 |
+
"grad_norm": 583.3887329101562,
|
12481 |
+
"learning_rate": 9.941141907232766e-06,
|
12482 |
+
"loss": 134.4311,
|
12483 |
+
"step": 17820
|
12484 |
+
},
|
12485 |
+
{
|
12486 |
+
"epoch": 0.14749555362534642,
|
12487 |
+
"grad_norm": 1191.19677734375,
|
12488 |
+
"learning_rate": 9.940925944477608e-06,
|
12489 |
+
"loss": 129.727,
|
12490 |
+
"step": 17830
|
12491 |
+
},
|
12492 |
+
{
|
12493 |
+
"epoch": 0.14757827687471564,
|
12494 |
+
"grad_norm": 1111.1417236328125,
|
12495 |
+
"learning_rate": 9.940709588595649e-06,
|
12496 |
+
"loss": 171.4274,
|
12497 |
+
"step": 17840
|
12498 |
+
},
|
12499 |
+
{
|
12500 |
+
"epoch": 0.14766100012408487,
|
12501 |
+
"grad_norm": 2006.4134521484375,
|
12502 |
+
"learning_rate": 9.940492839604103e-06,
|
12503 |
+
"loss": 152.9817,
|
12504 |
+
"step": 17850
|
12505 |
+
},
|
12506 |
+
{
|
12507 |
+
"epoch": 0.14774372337345412,
|
12508 |
+
"grad_norm": 1163.596923828125,
|
12509 |
+
"learning_rate": 9.940275697520216e-06,
|
12510 |
+
"loss": 169.9584,
|
12511 |
+
"step": 17860
|
12512 |
+
},
|
12513 |
+
{
|
12514 |
+
"epoch": 0.14782644662282335,
|
12515 |
+
"grad_norm": 1189.015869140625,
|
12516 |
+
"learning_rate": 9.940058162361264e-06,
|
12517 |
+
"loss": 152.1794,
|
12518 |
+
"step": 17870
|
12519 |
+
},
|
12520 |
+
{
|
12521 |
+
"epoch": 0.14790916987219258,
|
12522 |
+
"grad_norm": 998.8855590820312,
|
12523 |
+
"learning_rate": 9.939840234144556e-06,
|
12524 |
+
"loss": 129.5204,
|
12525 |
+
"step": 17880
|
12526 |
+
},
|
12527 |
+
{
|
12528 |
+
"epoch": 0.1479918931215618,
|
12529 |
+
"grad_norm": 442.9149475097656,
|
12530 |
+
"learning_rate": 9.939621912887431e-06,
|
12531 |
+
"loss": 106.7805,
|
12532 |
+
"step": 17890
|
12533 |
+
},
|
12534 |
+
{
|
12535 |
+
"epoch": 0.14807461637093106,
|
12536 |
+
"grad_norm": 830.00927734375,
|
12537 |
+
"learning_rate": 9.93940319860726e-06,
|
12538 |
+
"loss": 139.6457,
|
12539 |
+
"step": 17900
|
12540 |
+
},
|
12541 |
+
{
|
12542 |
+
"epoch": 0.14815733962030028,
|
12543 |
+
"grad_norm": 1069.5220947265625,
|
12544 |
+
"learning_rate": 9.939184091321445e-06,
|
12545 |
+
"loss": 129.1493,
|
12546 |
+
"step": 17910
|
12547 |
+
},
|
12548 |
+
{
|
12549 |
+
"epoch": 0.1482400628696695,
|
12550 |
+
"grad_norm": 1180.868896484375,
|
12551 |
+
"learning_rate": 9.938964591047421e-06,
|
12552 |
+
"loss": 108.2578,
|
12553 |
+
"step": 17920
|
12554 |
+
},
|
12555 |
+
{
|
12556 |
+
"epoch": 0.14832278611903876,
|
12557 |
+
"grad_norm": 1095.6793212890625,
|
12558 |
+
"learning_rate": 9.938744697802651e-06,
|
12559 |
+
"loss": 145.4649,
|
12560 |
+
"step": 17930
|
12561 |
+
},
|
12562 |
+
{
|
12563 |
+
"epoch": 0.148405509368408,
|
12564 |
+
"grad_norm": 1292.62744140625,
|
12565 |
+
"learning_rate": 9.938524411604631e-06,
|
12566 |
+
"loss": 145.161,
|
12567 |
+
"step": 17940
|
12568 |
+
},
|
12569 |
+
{
|
12570 |
+
"epoch": 0.14848823261777722,
|
12571 |
+
"grad_norm": 1319.2213134765625,
|
12572 |
+
"learning_rate": 9.938303732470888e-06,
|
12573 |
+
"loss": 129.5037,
|
12574 |
+
"step": 17950
|
12575 |
+
},
|
12576 |
+
{
|
12577 |
+
"epoch": 0.14857095586714647,
|
12578 |
+
"grad_norm": 697.8318481445312,
|
12579 |
+
"learning_rate": 9.938082660418981e-06,
|
12580 |
+
"loss": 103.5571,
|
12581 |
+
"step": 17960
|
12582 |
+
},
|
12583 |
+
{
|
12584 |
+
"epoch": 0.1486536791165157,
|
12585 |
+
"grad_norm": 784.6300659179688,
|
12586 |
+
"learning_rate": 9.937861195466498e-06,
|
12587 |
+
"loss": 133.7046,
|
12588 |
+
"step": 17970
|
12589 |
+
},
|
12590 |
+
{
|
12591 |
+
"epoch": 0.14873640236588492,
|
12592 |
+
"grad_norm": 966.1806030273438,
|
12593 |
+
"learning_rate": 9.937639337631064e-06,
|
12594 |
+
"loss": 170.2544,
|
12595 |
+
"step": 17980
|
12596 |
+
},
|
12597 |
+
{
|
12598 |
+
"epoch": 0.14881912561525418,
|
12599 |
+
"grad_norm": 862.203857421875,
|
12600 |
+
"learning_rate": 9.937417086930328e-06,
|
12601 |
+
"loss": 129.5846,
|
12602 |
+
"step": 17990
|
12603 |
+
},
|
12604 |
+
{
|
12605 |
+
"epoch": 0.1489018488646234,
|
12606 |
+
"grad_norm": 3391.59716796875,
|
12607 |
+
"learning_rate": 9.937194443381972e-06,
|
12608 |
+
"loss": 195.0929,
|
12609 |
+
"step": 18000
|
12610 |
}
|
12611 |
],
|
12612 |
"logging_steps": 10,
|