Training in progress, step 36000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +51 -3
- last-checkpoint/training_args.bin +1 -1
- pytorch_model.bin +1 -1
- runs/May12_05-18-47_d23934a61095/1683870172.8612866/events.out.tfevents.1683870172.d23934a61095.1332.1 +3 -0
- runs/May12_05-18-47_d23934a61095/events.out.tfevents.1683870172.d23934a61095.1332.0 +3 -0
- training_args.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3871543575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5c3b82f556d68ba0aa2646ebf00567eef5b67832f950bd34382ca02253c5b6f
|
3 |
size 3871543575
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1944201353
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d075d1abdd0908ddb355507c00f14b089f10075b2d489473b94a98fecaf5689f
|
3 |
size 1944201353
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37d470642f61bcdae12d033b4f4a72f3f8fea0d2e84d4d33f3f659ce2a3fcf82
|
3 |
size 14575
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb3d96e72039fcffe3e936751e5f9bf95804f00efdb11e53bb1f33cb88fe4634
|
3 |
size 557
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e71a8a305cadcbae01d25f21adfd7ec9de7ed8dfedb569d98bb82f00015de3a
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -390,11 +390,59 @@
|
|
390 |
"learning_rate": 2.4218264964642107e-06,
|
391 |
"loss": 1.7762,
|
392 |
"step": 32000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
393 |
}
|
394 |
],
|
395 |
"max_steps": 943410,
|
396 |
"num_train_epochs": 10,
|
397 |
-
"total_flos":
|
398 |
"trial_name": null,
|
399 |
"trial_params": null
|
400 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.3815944287213407,
|
5 |
+
"global_step": 36000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
390 |
"learning_rate": 2.4218264964642107e-06,
|
391 |
"loss": 1.7762,
|
392 |
"step": 32000
|
393 |
+
},
|
394 |
+
{
|
395 |
+
"epoch": 0.34,
|
396 |
+
"learning_rate": 4.985384811630502e-06,
|
397 |
+
"loss": 1.7527,
|
398 |
+
"step": 32500
|
399 |
+
},
|
400 |
+
{
|
401 |
+
"epoch": 0.35,
|
402 |
+
"learning_rate": 4.984931927529385e-06,
|
403 |
+
"loss": 1.7468,
|
404 |
+
"step": 33000
|
405 |
+
},
|
406 |
+
{
|
407 |
+
"epoch": 0.36,
|
408 |
+
"learning_rate": 4.984472154475628e-06,
|
409 |
+
"loss": 1.7746,
|
410 |
+
"step": 33500
|
411 |
+
},
|
412 |
+
{
|
413 |
+
"epoch": 0.36,
|
414 |
+
"learning_rate": 4.984006433938366e-06,
|
415 |
+
"loss": 1.7635,
|
416 |
+
"step": 34000
|
417 |
+
},
|
418 |
+
{
|
419 |
+
"epoch": 0.37,
|
420 |
+
"learning_rate": 4.983532900593762e-06,
|
421 |
+
"loss": 1.7936,
|
422 |
+
"step": 34500
|
423 |
+
},
|
424 |
+
{
|
425 |
+
"epoch": 0.37,
|
426 |
+
"learning_rate": 4.983052482175027e-06,
|
427 |
+
"loss": 1.7431,
|
428 |
+
"step": 35000
|
429 |
+
},
|
430 |
+
{
|
431 |
+
"epoch": 0.38,
|
432 |
+
"learning_rate": 4.98256518001402e-06,
|
433 |
+
"loss": 1.7891,
|
434 |
+
"step": 35500
|
435 |
+
},
|
436 |
+
{
|
437 |
+
"epoch": 0.38,
|
438 |
+
"learning_rate": 4.982070995461684e-06,
|
439 |
+
"loss": 1.7599,
|
440 |
+
"step": 36000
|
441 |
}
|
442 |
],
|
443 |
"max_steps": 943410,
|
444 |
"num_train_epochs": 10,
|
445 |
+
"total_flos": 2.030262528054067e+16,
|
446 |
"trial_name": null,
|
447 |
"trial_params": null
|
448 |
}
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3771
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10a40bc360196fa0b3bdc9ea0da4026c4fd12bfde9668ef8d7b5cb9fa61472db
|
3 |
size 3771
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1944201353
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d075d1abdd0908ddb355507c00f14b089f10075b2d489473b94a98fecaf5689f
|
3 |
size 1944201353
|
runs/May12_05-18-47_d23934a61095/1683870172.8612866/events.out.tfevents.1683870172.d23934a61095.1332.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f08d36ee12feb8da0fcd916e3c5de728e5acdd91a88e33b7f2a3f8f491ee165
|
3 |
+
size 6187
|
runs/May12_05-18-47_d23934a61095/events.out.tfevents.1683870172.d23934a61095.1332.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8fbab854fc78e8a7b19039f32fa5bb184f137495a20cb4b9b310b17d0d7fdf4
|
3 |
+
size 5743
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3771
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10a40bc360196fa0b3bdc9ea0da4026c4fd12bfde9668ef8d7b5cb9fa61472db
|
3 |
size 3771
|