Training in progress, step 535, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:173c968110406154bbd6958e0575f0041084fe35c92872803251c373142f7599
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:731ab2eb7506e7b8fba727ae9e889e4883eba19e71ccb1aee37d0543327dcf26
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f9ee5b1533ef89e38b0a831d8e4b8b744f0cbca34e3e202f7d262ca1861c412
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25f6265b720d778f2ce309335230d277e55711db968e54ca2d8c342eedbbfb57
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3733,6 +3733,41 @@
|
|
3733 |
"learning_rate": 9.613459075424034e-05,
|
3734 |
"loss": 0.8122,
|
3735 |
"step": 530
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3736 |
}
|
3737 |
],
|
3738 |
"logging_steps": 1,
|
@@ -3752,7 +3787,7 @@
|
|
3752 |
"attributes": {}
|
3753 |
}
|
3754 |
},
|
3755 |
-
"total_flos": 5.
|
3756 |
"train_batch_size": 4,
|
3757 |
"trial_name": null,
|
3758 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.6923325784535749,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 535,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3733 |
"learning_rate": 9.613459075424034e-05,
|
3734 |
"loss": 0.8122,
|
3735 |
"step": 530
|
3736 |
+
},
|
3737 |
+
{
|
3738 |
+
"epoch": 0.6871562601099968,
|
3739 |
+
"grad_norm": 0.7333494424819946,
|
3740 |
+
"learning_rate": 9.611868000118452e-05,
|
3741 |
+
"loss": 0.8027,
|
3742 |
+
"step": 531
|
3743 |
+
},
|
3744 |
+
{
|
3745 |
+
"epoch": 0.6884503396958913,
|
3746 |
+
"grad_norm": 0.7772257924079895,
|
3747 |
+
"learning_rate": 9.61027378916968e-05,
|
3748 |
+
"loss": 0.8538,
|
3749 |
+
"step": 532
|
3750 |
+
},
|
3751 |
+
{
|
3752 |
+
"epoch": 0.6897444192817859,
|
3753 |
+
"grad_norm": 0.7605924606323242,
|
3754 |
+
"learning_rate": 9.60867644366163e-05,
|
3755 |
+
"loss": 0.875,
|
3756 |
+
"step": 533
|
3757 |
+
},
|
3758 |
+
{
|
3759 |
+
"epoch": 0.6910384988676803,
|
3760 |
+
"grad_norm": 0.8444223999977112,
|
3761 |
+
"learning_rate": 9.607075964680352e-05,
|
3762 |
+
"loss": 1.0179,
|
3763 |
+
"step": 534
|
3764 |
+
},
|
3765 |
+
{
|
3766 |
+
"epoch": 0.6923325784535749,
|
3767 |
+
"grad_norm": 0.7386454939842224,
|
3768 |
+
"learning_rate": 9.605472353314023e-05,
|
3769 |
+
"loss": 0.9023,
|
3770 |
+
"step": 535
|
3771 |
}
|
3772 |
],
|
3773 |
"logging_steps": 1,
|
|
|
3787 |
"attributes": {}
|
3788 |
}
|
3789 |
},
|
3790 |
+
"total_flos": 5.980963868560589e+17,
|
3791 |
"train_batch_size": 4,
|
3792 |
"trial_name": null,
|
3793 |
"trial_params": null
|