Training in progress, step 510, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a44a782ea638c6f7e9dc51e718ea5f60364b15256a8cf012bd12a3e32e577b7
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d81439b95ab9fdfc28944c3f45684bab1b89a36ee16c058a71c1fbf8c0e50d5
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd7132b96ed38ba9b738ad147e8a862f64c2ac7a948dbbba18a7efce0585f901
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a13a5d867c4b3340715fb328361f350e2a0ea77e86afd98eac6d64dcb61c2c63
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3558,6 +3558,41 @@
|
|
3558 |
"learning_rate": 9.652213747295906e-05,
|
3559 |
"loss": 0.8605,
|
3560 |
"step": 505
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3561 |
}
|
3562 |
],
|
3563 |
"logging_steps": 1,
|
@@ -3577,7 +3612,7 @@
|
|
3577 |
"attributes": {}
|
3578 |
}
|
3579 |
},
|
3580 |
-
"total_flos": 5.
|
3581 |
"train_batch_size": 4,
|
3582 |
"trial_name": null,
|
3583 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.6599805888062116,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 510,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3558 |
"learning_rate": 9.652213747295906e-05,
|
3559 |
"loss": 0.8605,
|
3560 |
"step": 505
|
3561 |
+
},
|
3562 |
+
{
|
3563 |
+
"epoch": 0.6548042704626335,
|
3564 |
+
"grad_norm": 0.7796474695205688,
|
3565 |
+
"learning_rate": 9.650701409094018e-05,
|
3566 |
+
"loss": 0.803,
|
3567 |
+
"step": 506
|
3568 |
+
},
|
3569 |
+
{
|
3570 |
+
"epoch": 0.656098350048528,
|
3571 |
+
"grad_norm": 0.8425498008728027,
|
3572 |
+
"learning_rate": 9.649185908845818e-05,
|
3573 |
+
"loss": 0.9324,
|
3574 |
+
"step": 507
|
3575 |
+
},
|
3576 |
+
{
|
3577 |
+
"epoch": 0.6573924296344226,
|
3578 |
+
"grad_norm": 0.6857015490531921,
|
3579 |
+
"learning_rate": 9.647667247581703e-05,
|
3580 |
+
"loss": 0.8701,
|
3581 |
+
"step": 508
|
3582 |
+
},
|
3583 |
+
{
|
3584 |
+
"epoch": 0.658686509220317,
|
3585 |
+
"grad_norm": 0.7896414995193481,
|
3586 |
+
"learning_rate": 9.646145426334223e-05,
|
3587 |
+
"loss": 0.7447,
|
3588 |
+
"step": 509
|
3589 |
+
},
|
3590 |
+
{
|
3591 |
+
"epoch": 0.6599805888062116,
|
3592 |
+
"grad_norm": 0.7142564058303833,
|
3593 |
+
"learning_rate": 9.644620446138077e-05,
|
3594 |
+
"loss": 0.9221,
|
3595 |
+
"step": 510
|
3596 |
}
|
3597 |
],
|
3598 |
"logging_steps": 1,
|
|
|
3612 |
"attributes": {}
|
3613 |
}
|
3614 |
},
|
3615 |
+
"total_flos": 5.701479575637197e+17,
|
3616 |
"train_batch_size": 4,
|
3617 |
"trial_name": null,
|
3618 |
"trial_params": null
|