Training in progress, step 210, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 191968
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2791b511c4630b21fd991533625ec1ec52da3e5cc1609da7a4c2cfedc1bcba6d
|
3 |
size 191968
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 253144
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a884f4320934e16da5143deade141b5396382f6dfdc0784d68105ea5d71bc6b2
|
3 |
size 253144
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fec363189963dc133232a1202530bba3901933ae6ee2483645557d8ee2922117
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16a7801db1aa9f181cf78d5699e3a7862ab42bf9c452e31cb54501196abe18a0
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 21,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -528,6 +528,63 @@
|
|
528 |
"eval_samples_per_second": 540.238,
|
529 |
"eval_steps_per_second": 69.459,
|
530 |
"step": 189
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
531 |
}
|
532 |
],
|
533 |
"logging_steps": 3,
|
@@ -547,7 +604,7 @@
|
|
547 |
"attributes": {}
|
548 |
}
|
549 |
},
|
550 |
-
"total_flos":
|
551 |
"train_batch_size": 8,
|
552 |
"trial_name": null,
|
553 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.5438066465256797,
|
5 |
"eval_steps": 21,
|
6 |
+
"global_step": 210,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
528 |
"eval_samples_per_second": 540.238,
|
529 |
"eval_steps_per_second": 69.459,
|
530 |
"step": 189
|
531 |
+
},
|
532 |
+
{
|
533 |
+
"epoch": 2.326283987915408,
|
534 |
+
"grad_norm": 0.17823714017868042,
|
535 |
+
"learning_rate": 1.3390009847968504e-05,
|
536 |
+
"loss": 10.1773,
|
537 |
+
"step": 192
|
538 |
+
},
|
539 |
+
{
|
540 |
+
"epoch": 2.3625377643504533,
|
541 |
+
"grad_norm": 0.21689902245998383,
|
542 |
+
"learning_rate": 1.2075907148663579e-05,
|
543 |
+
"loss": 10.1772,
|
544 |
+
"step": 195
|
545 |
+
},
|
546 |
+
{
|
547 |
+
"epoch": 2.3987915407854983,
|
548 |
+
"grad_norm": 0.3612368106842041,
|
549 |
+
"learning_rate": 1.0820770952526155e-05,
|
550 |
+
"loss": 10.1826,
|
551 |
+
"step": 198
|
552 |
+
},
|
553 |
+
{
|
554 |
+
"epoch": 2.4350453172205437,
|
555 |
+
"grad_norm": 0.19127142429351807,
|
556 |
+
"learning_rate": 9.62655281559679e-06,
|
557 |
+
"loss": 10.1821,
|
558 |
+
"step": 201
|
559 |
+
},
|
560 |
+
{
|
561 |
+
"epoch": 2.471299093655589,
|
562 |
+
"grad_norm": 0.21965357661247253,
|
563 |
+
"learning_rate": 8.49510957510633e-06,
|
564 |
+
"loss": 10.1765,
|
565 |
+
"step": 204
|
566 |
+
},
|
567 |
+
{
|
568 |
+
"epoch": 2.5075528700906347,
|
569 |
+
"grad_norm": 0.1769980639219284,
|
570 |
+
"learning_rate": 7.4282004623615396e-06,
|
571 |
+
"loss": 10.1756,
|
572 |
+
"step": 207
|
573 |
+
},
|
574 |
+
{
|
575 |
+
"epoch": 2.5438066465256797,
|
576 |
+
"grad_norm": 0.20193351805210114,
|
577 |
+
"learning_rate": 6.427484367393699e-06,
|
578 |
+
"loss": 10.178,
|
579 |
+
"step": 210
|
580 |
+
},
|
581 |
+
{
|
582 |
+
"epoch": 2.5438066465256797,
|
583 |
+
"eval_loss": 10.170087814331055,
|
584 |
+
"eval_runtime": 0.2628,
|
585 |
+
"eval_samples_per_second": 532.826,
|
586 |
+
"eval_steps_per_second": 68.506,
|
587 |
+
"step": 210
|
588 |
}
|
589 |
],
|
590 |
"logging_steps": 3,
|
|
|
604 |
"attributes": {}
|
605 |
}
|
606 |
},
|
607 |
+
"total_flos": 23425638727680.0,
|
608 |
"train_batch_size": 8,
|
609 |
"trial_name": null,
|
610 |
"trial_params": null
|