Training in progress, step 36000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +2 -2
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +51 -3
- last-checkpoint/training_args.bin +1 -1
- pytorch_model.bin +1 -1
- runs/Jun19_15-51-34_06b9084a2b92/1687191459.656514/events.out.tfevents.1687191459.06b9084a2b92.440.1 +3 -0
- runs/Jun19_15-51-34_06b9084a2b92/events.out.tfevents.1687191459.06b9084a2b92.440.0 +3 -0
- training_args.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3871543575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec7e298fd62c1a39858803bf29d7c2e8efcbd4c5c680eb02363626cd8abe54eb
|
3 |
size 3871543575
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1944201353
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26ac5bf0ba950051aa0ff732cba35fab243d7da59fb32a4ab70218e8b7530bd8
|
3 |
size 1944201353
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21f8ddec2b20067022a53fc46601e8b649c82ca53bd5e133dba2ac677718e2a6
|
3 |
+
size 14639
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a239714c7b10ff40537ad8f432bd550ee4156037360613d4fe177e8a854e271e
|
3 |
size 557
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f18a4737538491cfcba9aa7df3f92263459c4ea1d9c125899e2aabf1d656f30e
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -390,11 +390,59 @@
|
|
390 |
"learning_rate": 3.1474289357030756e-06,
|
391 |
"loss": 1.3125,
|
392 |
"step": 32000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
393 |
}
|
394 |
],
|
395 |
"max_steps": 38148,
|
396 |
"num_train_epochs": 1,
|
397 |
-
"total_flos": 3.
|
398 |
"trial_name": null,
|
399 |
"trial_params": null
|
400 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9436830902475596,
|
5 |
+
"global_step": 36000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
390 |
"learning_rate": 3.1474289357030756e-06,
|
391 |
"loss": 1.3125,
|
392 |
"step": 32000
|
393 |
+
},
|
394 |
+
{
|
395 |
+
"epoch": 0.85,
|
396 |
+
"learning_rate": 2.666066827958255e-06,
|
397 |
+
"loss": 1.2965,
|
398 |
+
"step": 32500
|
399 |
+
},
|
400 |
+
{
|
401 |
+
"epoch": 0.87,
|
402 |
+
"learning_rate": 2.2225664154712e-06,
|
403 |
+
"loss": 1.3045,
|
404 |
+
"step": 33000
|
405 |
+
},
|
406 |
+
{
|
407 |
+
"epoch": 0.88,
|
408 |
+
"learning_rate": 1.8176795442036071e-06,
|
409 |
+
"loss": 1.2954,
|
410 |
+
"step": 33500
|
411 |
+
},
|
412 |
+
{
|
413 |
+
"epoch": 0.89,
|
414 |
+
"learning_rate": 1.4520926003470992e-06,
|
415 |
+
"loss": 1.2965,
|
416 |
+
"step": 34000
|
417 |
+
},
|
418 |
+
{
|
419 |
+
"epoch": 0.9,
|
420 |
+
"learning_rate": 1.126425346724097e-06,
|
421 |
+
"loss": 1.2995,
|
422 |
+
"step": 34500
|
423 |
+
},
|
424 |
+
{
|
425 |
+
"epoch": 0.92,
|
426 |
+
"learning_rate": 8.417595393838212e-07,
|
427 |
+
"loss": 1.2603,
|
428 |
+
"step": 35000
|
429 |
+
},
|
430 |
+
{
|
431 |
+
"epoch": 0.93,
|
432 |
+
"learning_rate": 5.974369756604148e-07,
|
433 |
+
"loss": 1.2969,
|
434 |
+
"step": 35500
|
435 |
+
},
|
436 |
+
{
|
437 |
+
"epoch": 0.94,
|
438 |
+
"learning_rate": 3.94482960755474e-07,
|
439 |
+
"loss": 1.2839,
|
440 |
+
"step": 36000
|
441 |
}
|
442 |
],
|
443 |
"max_steps": 38148,
|
444 |
"num_train_epochs": 1,
|
445 |
+
"total_flos": 3.962708218655539e+16,
|
446 |
"trial_name": null,
|
447 |
"trial_params": null
|
448 |
}
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3771
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5122d7db340c58d7a11bdd99b8ba51f813c163ab6f5efa038b8938dd747a00c2
|
3 |
size 3771
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1944201353
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26ac5bf0ba950051aa0ff732cba35fab243d7da59fb32a4ab70218e8b7530bd8
|
3 |
size 1944201353
|
runs/Jun19_15-51-34_06b9084a2b92/1687191459.656514/events.out.tfevents.1687191459.06b9084a2b92.440.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a1eff574ca9f40f112e846573488c0cb2bcb234a588b96aca4cdf0f21b9476f
|
3 |
+
size 6187
|
runs/Jun19_15-51-34_06b9084a2b92/events.out.tfevents.1687191459.06b9084a2b92.440.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b49770d903b7769b229d276aca2e3682a533e09a7a130690be927f304f6794c6
|
3 |
+
size 5742
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3771
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5122d7db340c58d7a11bdd99b8ba51f813c163ab6f5efa038b8938dd747a00c2
|
3 |
size 3771
|