Training in progress, step 112, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27153960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:952ebd31f776c3dd854d46355b025759413bc68ed9e27518172d068815fc28f6
|
3 |
size 27153960
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 54405050
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:978ce86404e562d55461b7dad0dbe89210145bf9b867f49d5c8c8d6102f95720
|
3 |
size 54405050
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c9b56cb3dff46736275b60088386cb82f5e8834747cdf16e7f91e68301dd4b5
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95997b279ed7aa98f5d47d9af99637b6c419abab98a51c8e49868272f21211d6
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -801,6 +801,20 @@
|
|
801 |
"learning_rate": 1.7484231070955536e-07,
|
802 |
"loss": 2.4562,
|
803 |
"step": 110
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
804 |
}
|
805 |
],
|
806 |
"logging_steps": 1,
|
@@ -815,12 +829,12 @@
|
|
815 |
"should_evaluate": false,
|
816 |
"should_log": false,
|
817 |
"should_save": true,
|
818 |
-
"should_training_stop":
|
819 |
},
|
820 |
"attributes": {}
|
821 |
}
|
822 |
},
|
823 |
-
"total_flos": 1.
|
824 |
"train_batch_size": 2,
|
825 |
"trial_name": null,
|
826 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.0219224283305226,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 112,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
801 |
"learning_rate": 1.7484231070955536e-07,
|
802 |
"loss": 2.4562,
|
803 |
"step": 110
|
804 |
+
},
|
805 |
+
{
|
806 |
+
"epoch": 2.9949409780775715,
|
807 |
+
"grad_norm": 0.5681694149971008,
|
808 |
+
"learning_rate": 4.372332253694821e-08,
|
809 |
+
"loss": 2.2075,
|
810 |
+
"step": 111
|
811 |
+
},
|
812 |
+
{
|
813 |
+
"epoch": 3.0219224283305226,
|
814 |
+
"grad_norm": 1.986908197402954,
|
815 |
+
"learning_rate": 0.0,
|
816 |
+
"loss": 4.4294,
|
817 |
+
"step": 112
|
818 |
}
|
819 |
],
|
820 |
"logging_steps": 1,
|
|
|
829 |
"should_evaluate": false,
|
830 |
"should_log": false,
|
831 |
"should_save": true,
|
832 |
+
"should_training_stop": true
|
833 |
},
|
834 |
"attributes": {}
|
835 |
}
|
836 |
},
|
837 |
+
"total_flos": 1.2738571278483456e+16,
|
838 |
"train_batch_size": 2,
|
839 |
"trial_name": null,
|
840 |
"trial_params": null
|