Training in progress, step 1287, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 35237104
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e6fbb2bfd52561e812f1aaae0e7d2b6e891b87409d7b7b89600d1c8b2267b0c
|
3 |
size 35237104
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 18810356
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7254b162fe674b1b20ce0ba3b325760b2d39b436c59e27fa7b1d165a3d4f5466
|
3 |
size 18810356
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32223ff90a7bc618a7d1a841a4f8d951e3cfafa282efaa7185e0e1a176562652
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1748cfd64f75de15bebd10f07006b82775ac37a6afa9eedcdb3abb9f08211cb6
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 107.
|
5 |
"eval_steps": 3,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -12427,6 +12427,35 @@
|
|
12427 |
"eval_samples_per_second": 4.971,
|
12428 |
"eval_steps_per_second": 2.982,
|
12429 |
"step": 1284
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12430 |
}
|
12431 |
],
|
12432 |
"logging_steps": 1,
|
@@ -12446,7 +12475,7 @@
|
|
12446 |
"attributes": {}
|
12447 |
}
|
12448 |
},
|
12449 |
-
"total_flos": 9.
|
12450 |
"train_batch_size": 2,
|
12451 |
"trial_name": null,
|
12452 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 107.25,
|
5 |
"eval_steps": 3,
|
6 |
+
"global_step": 1287,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
12427 |
"eval_samples_per_second": 4.971,
|
12428 |
"eval_steps_per_second": 2.982,
|
12429 |
"step": 1284
|
12430 |
+
},
|
12431 |
+
{
|
12432 |
+
"epoch": 107.08333333333333,
|
12433 |
+
"grad_norm": 0.00019312536460347474,
|
12434 |
+
"learning_rate": 3.8142304491339276e-05,
|
12435 |
+
"loss": 0.0,
|
12436 |
+
"step": 1285
|
12437 |
+
},
|
12438 |
+
{
|
12439 |
+
"epoch": 107.16666666666667,
|
12440 |
+
"grad_norm": 0.00016591262829024345,
|
12441 |
+
"learning_rate": 3.800449896988535e-05,
|
12442 |
+
"loss": 0.0,
|
12443 |
+
"step": 1286
|
12444 |
+
},
|
12445 |
+
{
|
12446 |
+
"epoch": 107.25,
|
12447 |
+
"grad_norm": 0.0002106752071995288,
|
12448 |
+
"learning_rate": 3.786688441341778e-05,
|
12449 |
+
"loss": 0.0,
|
12450 |
+
"step": 1287
|
12451 |
+
},
|
12452 |
+
{
|
12453 |
+
"epoch": 107.25,
|
12454 |
+
"eval_loss": 0.5604909062385559,
|
12455 |
+
"eval_runtime": 1.004,
|
12456 |
+
"eval_samples_per_second": 4.98,
|
12457 |
+
"eval_steps_per_second": 2.988,
|
12458 |
+
"step": 1287
|
12459 |
}
|
12460 |
],
|
12461 |
"logging_steps": 1,
|
|
|
12475 |
"attributes": {}
|
12476 |
}
|
12477 |
},
|
12478 |
+
"total_flos": 9.182254213890048e+16,
|
12479 |
"train_batch_size": 2,
|
12480 |
"trial_name": null,
|
12481 |
"trial_params": null
|