Training in progress, step 21000, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1443,6 +1443,10 @@ You can finetune this model on your own dataset.
|
|
| 1443 |
| 0.3658 | 20700 | 0.35 |
|
| 1444 |
| 0.3667 | 20750 | 0.3241 |
|
| 1445 |
| 0.3675 | 20800 | 0.37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1446 |
|
| 1447 |
</details>
|
| 1448 |
|
|
|
|
| 1443 |
| 0.3658 | 20700 | 0.35 |
|
| 1444 |
| 0.3667 | 20750 | 0.3241 |
|
| 1445 |
| 0.3675 | 20800 | 0.37 |
|
| 1446 |
+
| 0.3684 | 20850 | 0.2689 |
|
| 1447 |
+
| 0.3693 | 20900 | 0.4061 |
|
| 1448 |
+
| 0.3702 | 20950 | 0.3412 |
|
| 1449 |
+
| 0.3711 | 21000 | 0.3619 |
|
| 1450 |
|
| 1451 |
</details>
|
| 1452 |
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:60a3acd9a3207e3da6ba11eed7eb69a0e47965296ed3705edb0291288f4ba6b4
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a2a1c6ca3b664330d252fc223b17bbf693bbca085612f3859c6f296a40edd15
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dde2d77fb5f8a6059634e011ccb92791a33daebbb2a593b29b9a9f7cf46a0c7a
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5bb6b386a610284b8662dfd5a6292ebf91e688fce8f9594b479216784ff976a
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8bc1ed1de742b8ceb6a4b045495a5daf14bac6f6cc397bb6df6345b8871c1b3f
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2920,6 +2920,34 @@
|
|
| 2920 |
"learning_rate": 3.514264396929179e-05,
|
| 2921 |
"loss": 0.37,
|
| 2922 |
"step": 20800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2923 |
}
|
| 2924 |
],
|
| 2925 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.3710837412309378,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 21000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2920 |
"learning_rate": 3.514264396929179e-05,
|
| 2921 |
"loss": 0.37,
|
| 2922 |
"step": 20800
|
| 2923 |
+
},
|
| 2924 |
+
{
|
| 2925 |
+
"epoch": 0.3684331430792882,
|
| 2926 |
+
"grad_norm": 1.5556259155273438,
|
| 2927 |
+
"learning_rate": 3.509355795095325e-05,
|
| 2928 |
+
"loss": 0.2689,
|
| 2929 |
+
"step": 20850
|
| 2930 |
+
},
|
| 2931 |
+
{
|
| 2932 |
+
"epoch": 0.36931667579650473,
|
| 2933 |
+
"grad_norm": 1.6530933380126953,
|
| 2934 |
+
"learning_rate": 3.504447193261472e-05,
|
| 2935 |
+
"loss": 0.4061,
|
| 2936 |
+
"step": 20900
|
| 2937 |
+
},
|
| 2938 |
+
{
|
| 2939 |
+
"epoch": 0.37020020851372126,
|
| 2940 |
+
"grad_norm": 1.250317931175232,
|
| 2941 |
+
"learning_rate": 3.499538591427618e-05,
|
| 2942 |
+
"loss": 0.3412,
|
| 2943 |
+
"step": 20950
|
| 2944 |
+
},
|
| 2945 |
+
{
|
| 2946 |
+
"epoch": 0.3710837412309378,
|
| 2947 |
+
"grad_norm": 1.9599151611328125,
|
| 2948 |
+
"learning_rate": 3.494728161630441e-05,
|
| 2949 |
+
"loss": 0.3619,
|
| 2950 |
+
"step": 21000
|
| 2951 |
}
|
| 2952 |
],
|
| 2953 |
"logging_steps": 50,
|