Training in progress, step 365000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f7dc68662c82c6ffbedd49f2076ab28a2c01e48540614ce2f174a82223716ed
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0696c7f5ae19cb889783a904f338158b48f61bac33c2488b7805a376c9387ff9
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9814acc3b57dc08524e0312f095af7ed7dfbb32d1dc12c13c3e82715351d160
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb6d0a859d60adef9c8fe8ecfc6811c66d512f2c7bc8e8e2bc033710665ec541
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:889772b5bbee7f72c148bc34b08a45896a822f63a7afe8ab5db2d7f65588263c
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da9aedaff3157e59f253dc3a5b6a6100df76d1279d035f184adbb4b5eb1551ed
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8fb9b18d70589928b384e18f28370eb0d78024bdf16f6b9c2c6b57b8dcc9be8
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:910ecb4645c6ef73635557af0ae3d409a37a838e704079ef7d2edea1a467a58d
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 6.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -4326,6 +4326,66 @@
|
|
4326 |
"learning_rate": 3.5428460769004264e-05,
|
4327 |
"loss": 0.3216,
|
4328 |
"step": 360000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4329 |
}
|
4330 |
],
|
4331 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 6.212764146688113,
|
5 |
+
"global_step": 365000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
4326 |
"learning_rate": 3.5428460769004264e-05,
|
4327 |
"loss": 0.3216,
|
4328 |
"step": 360000
|
4329 |
+
},
|
4330 |
+
{
|
4331 |
+
"epoch": 6.14,
|
4332 |
+
"learning_rate": 3.525911128741588e-05,
|
4333 |
+
"loss": 0.3217,
|
4334 |
+
"step": 360500
|
4335 |
+
},
|
4336 |
+
{
|
4337 |
+
"epoch": 6.14,
|
4338 |
+
"learning_rate": 3.5090203380336435e-05,
|
4339 |
+
"loss": 0.3216,
|
4340 |
+
"step": 361000
|
4341 |
+
},
|
4342 |
+
{
|
4343 |
+
"epoch": 6.15,
|
4344 |
+
"learning_rate": 3.4921738714818835e-05,
|
4345 |
+
"loss": 0.3216,
|
4346 |
+
"step": 361500
|
4347 |
+
},
|
4348 |
+
{
|
4349 |
+
"epoch": 6.16,
|
4350 |
+
"learning_rate": 3.4753718953541264e-05,
|
4351 |
+
"loss": 0.3216,
|
4352 |
+
"step": 362000
|
4353 |
+
},
|
4354 |
+
{
|
4355 |
+
"epoch": 6.17,
|
4356 |
+
"learning_rate": 3.458614575479098e-05,
|
4357 |
+
"loss": 0.3216,
|
4358 |
+
"step": 362500
|
4359 |
+
},
|
4360 |
+
{
|
4361 |
+
"epoch": 6.18,
|
4362 |
+
"learning_rate": 3.441902077244776e-05,
|
4363 |
+
"loss": 0.3218,
|
4364 |
+
"step": 363000
|
4365 |
+
},
|
4366 |
+
{
|
4367 |
+
"epoch": 6.19,
|
4368 |
+
"learning_rate": 3.425234565596775e-05,
|
4369 |
+
"loss": 0.3216,
|
4370 |
+
"step": 363500
|
4371 |
+
},
|
4372 |
+
{
|
4373 |
+
"epoch": 6.2,
|
4374 |
+
"learning_rate": 3.4086454045879215e-05,
|
4375 |
+
"loss": 0.3217,
|
4376 |
+
"step": 364000
|
4377 |
+
},
|
4378 |
+
{
|
4379 |
+
"epoch": 6.2,
|
4380 |
+
"learning_rate": 3.39206826837805e-05,
|
4381 |
+
"loss": 0.3217,
|
4382 |
+
"step": 364500
|
4383 |
+
},
|
4384 |
+
{
|
4385 |
+
"epoch": 6.21,
|
4386 |
+
"learning_rate": 3.37553661059407e-05,
|
4387 |
+
"loss": 0.3218,
|
4388 |
+
"step": 365000
|
4389 |
}
|
4390 |
],
|
4391 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0696c7f5ae19cb889783a904f338158b48f61bac33c2488b7805a376c9387ff9
|
3 |
size 201355195
|