Training in progress, step 23800, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1499,6 +1499,10 @@ You can finetune this model on your own dataset.
|
|
| 1499 |
| 0.4153 | 23500 | 0.3987 |
|
| 1500 |
| 0.4161 | 23550 | 0.3387 |
|
| 1501 |
| 0.4170 | 23600 | 0.2989 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1502 |
|
| 1503 |
</details>
|
| 1504 |
|
|
|
|
| 1499 |
| 0.4153 | 23500 | 0.3987 |
|
| 1500 |
| 0.4161 | 23550 | 0.3387 |
|
| 1501 |
| 0.4170 | 23600 | 0.2989 |
|
| 1502 |
+
| 0.4179 | 23650 | 0.2629 |
|
| 1503 |
+
| 0.4188 | 23700 | 0.2905 |
|
| 1504 |
+
| 0.4197 | 23750 | 0.3234 |
|
| 1505 |
+
| 0.4206 | 23800 | 0.3063 |
|
| 1506 |
|
| 1507 |
</details>
|
| 1508 |
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da25745d4f384214e5fd8311c12900458d1b5b86d594566fce244180fc108392
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b4992703dc226d67e2557f70e2f6a89c68bca3aed88eebebef64b02c8918852
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f2a71677d5d8b3309c52c3c93b3a28a28f9259cc007d0be9b71770e4f3e0700
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:508437f27d03b8b5ea47dcee3f87a11f21e258b0021c269cc27deb7dd1d6dca5
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6db4f21bcf6330e113ebe3fc1f667f5da112ad8caba02a1bdc8f265e3ca1aa1d
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3312,6 +3312,34 @@
|
|
| 3312 |
"learning_rate": 3.239579038306729e-05,
|
| 3313 |
"loss": 0.2989,
|
| 3314 |
"step": 23600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3315 |
}
|
| 3316 |
],
|
| 3317 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.42056157339506284,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 23800,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3312 |
"learning_rate": 3.239579038306729e-05,
|
| 3313 |
"loss": 0.2989,
|
| 3314 |
"step": 23600
|
| 3315 |
+
},
|
| 3316 |
+
{
|
| 3317 |
+
"epoch": 0.41791097524341325,
|
| 3318 |
+
"grad_norm": 1.4236600399017334,
|
| 3319 |
+
"learning_rate": 3.234670436472875e-05,
|
| 3320 |
+
"loss": 0.2629,
|
| 3321 |
+
"step": 23650
|
| 3322 |
+
},
|
| 3323 |
+
{
|
| 3324 |
+
"epoch": 0.4187945079606298,
|
| 3325 |
+
"grad_norm": 3.2101380825042725,
|
| 3326 |
+
"learning_rate": 3.229761834639022e-05,
|
| 3327 |
+
"loss": 0.2905,
|
| 3328 |
+
"step": 23700
|
| 3329 |
+
},
|
| 3330 |
+
{
|
| 3331 |
+
"epoch": 0.4196780406778463,
|
| 3332 |
+
"grad_norm": 1.3380919694900513,
|
| 3333 |
+
"learning_rate": 3.2248532328051675e-05,
|
| 3334 |
+
"loss": 0.3234,
|
| 3335 |
+
"step": 23750
|
| 3336 |
+
},
|
| 3337 |
+
{
|
| 3338 |
+
"epoch": 0.42056157339506284,
|
| 3339 |
+
"grad_norm": 1.5015414953231812,
|
| 3340 |
+
"learning_rate": 3.2199446309713146e-05,
|
| 3341 |
+
"loss": 0.3063,
|
| 3342 |
+
"step": 23800
|
| 3343 |
}
|
| 3344 |
],
|
| 3345 |
"logging_steps": 50,
|