Training in progress, step 3600, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6832520
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1cadaf5fbfd4e6aee40554c146ca75dc1f95cbd1b5133ac28d790b5d9edc490b
|
| 3 |
size 6832520
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13739450
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d77f59571dc19ec2ae97f48b762fc9c7104b450b1dd4f4f44c20086f61be99a4
|
| 3 |
size 13739450
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61f9e60c11be639e03bf81d78cbe30bf20382df1ba6584029adec3134d967f6d
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1256
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cea58db64ba456c38a9f8ab6236e784932b8260cdaa3af8426d157163570e03c
|
| 3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 1.9334535598754883,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-1500",
|
| 4 |
-
"epoch": 4.
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -2606,6 +2606,119 @@
|
|
| 2606 |
"eval_samples_per_second": 20.4,
|
| 2607 |
"eval_steps_per_second": 20.4,
|
| 2608 |
"step": 3450
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2609 |
}
|
| 2610 |
],
|
| 2611 |
"logging_steps": 10,
|
|
@@ -2625,7 +2738,7 @@
|
|
| 2625 |
"attributes": {}
|
| 2626 |
}
|
| 2627 |
},
|
| 2628 |
-
"total_flos": 4.
|
| 2629 |
"train_batch_size": 16,
|
| 2630 |
"trial_name": null,
|
| 2631 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 1.9334535598754883,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-1500",
|
| 4 |
+
"epoch": 4.597701149425287,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 3600,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 2606 |
"eval_samples_per_second": 20.4,
|
| 2607 |
"eval_steps_per_second": 20.4,
|
| 2608 |
"step": 3450
|
| 2609 |
+
},
|
| 2610 |
+
{
|
| 2611 |
+
"epoch": 4.41890166028097,
|
| 2612 |
+
"grad_norm": 2.3654282093048096,
|
| 2613 |
+
"learning_rate": 3.0975050967969045e-06,
|
| 2614 |
+
"loss": 1.7632,
|
| 2615 |
+
"step": 3460
|
| 2616 |
+
},
|
| 2617 |
+
{
|
| 2618 |
+
"epoch": 4.431673052362708,
|
| 2619 |
+
"grad_norm": 2.546827793121338,
|
| 2620 |
+
"learning_rate": 3.097407623305086e-06,
|
| 2621 |
+
"loss": 1.7426,
|
| 2622 |
+
"step": 3470
|
| 2623 |
+
},
|
| 2624 |
+
{
|
| 2625 |
+
"epoch": 4.444444444444445,
|
| 2626 |
+
"grad_norm": 2.499905824661255,
|
| 2627 |
+
"learning_rate": 3.0972451720296086e-06,
|
| 2628 |
+
"loss": 1.7915,
|
| 2629 |
+
"step": 3480
|
| 2630 |
+
},
|
| 2631 |
+
{
|
| 2632 |
+
"epoch": 4.457215836526181,
|
| 2633 |
+
"grad_norm": 2.484066963195801,
|
| 2634 |
+
"learning_rate": 3.097017749786602e-06,
|
| 2635 |
+
"loss": 1.7089,
|
| 2636 |
+
"step": 3490
|
| 2637 |
+
},
|
| 2638 |
+
{
|
| 2639 |
+
"epoch": 4.469987228607918,
|
| 2640 |
+
"grad_norm": 2.5977776050567627,
|
| 2641 |
+
"learning_rate": 3.096725366118249e-06,
|
| 2642 |
+
"loss": 1.7957,
|
| 2643 |
+
"step": 3500
|
| 2644 |
+
},
|
| 2645 |
+
{
|
| 2646 |
+
"epoch": 4.482758620689655,
|
| 2647 |
+
"grad_norm": 2.3807027339935303,
|
| 2648 |
+
"learning_rate": 3.096368033292382e-06,
|
| 2649 |
+
"loss": 1.7295,
|
| 2650 |
+
"step": 3510
|
| 2651 |
+
},
|
| 2652 |
+
{
|
| 2653 |
+
"epoch": 4.495530012771392,
|
| 2654 |
+
"grad_norm": 2.4631288051605225,
|
| 2655 |
+
"learning_rate": 3.095945766301971e-06,
|
| 2656 |
+
"loss": 1.7719,
|
| 2657 |
+
"step": 3520
|
| 2658 |
+
},
|
| 2659 |
+
{
|
| 2660 |
+
"epoch": 4.508301404853129,
|
| 2661 |
+
"grad_norm": 2.4563748836517334,
|
| 2662 |
+
"learning_rate": 3.095458582864493e-06,
|
| 2663 |
+
"loss": 1.7191,
|
| 2664 |
+
"step": 3530
|
| 2665 |
+
},
|
| 2666 |
+
{
|
| 2667 |
+
"epoch": 4.521072796934866,
|
| 2668 |
+
"grad_norm": 2.3577940464019775,
|
| 2669 |
+
"learning_rate": 3.09490650342119e-06,
|
| 2670 |
+
"loss": 1.725,
|
| 2671 |
+
"step": 3540
|
| 2672 |
+
},
|
| 2673 |
+
{
|
| 2674 |
+
"epoch": 4.533844189016603,
|
| 2675 |
+
"grad_norm": 2.4217264652252197,
|
| 2676 |
+
"learning_rate": 3.0942895511362085e-06,
|
| 2677 |
+
"loss": 1.7284,
|
| 2678 |
+
"step": 3550
|
| 2679 |
+
},
|
| 2680 |
+
{
|
| 2681 |
+
"epoch": 4.54661558109834,
|
| 2682 |
+
"grad_norm": 2.335932493209839,
|
| 2683 |
+
"learning_rate": 3.093607751895632e-06,
|
| 2684 |
+
"loss": 1.7966,
|
| 2685 |
+
"step": 3560
|
| 2686 |
+
},
|
| 2687 |
+
{
|
| 2688 |
+
"epoch": 4.559386973180077,
|
| 2689 |
+
"grad_norm": 2.4453847408294678,
|
| 2690 |
+
"learning_rate": 3.0928611343063904e-06,
|
| 2691 |
+
"loss": 1.787,
|
| 2692 |
+
"step": 3570
|
| 2693 |
+
},
|
| 2694 |
+
{
|
| 2695 |
+
"epoch": 4.572158365261814,
|
| 2696 |
+
"grad_norm": 2.350062608718872,
|
| 2697 |
+
"learning_rate": 3.092049729695062e-06,
|
| 2698 |
+
"loss": 1.7184,
|
| 2699 |
+
"step": 3580
|
| 2700 |
+
},
|
| 2701 |
+
{
|
| 2702 |
+
"epoch": 4.58492975734355,
|
| 2703 |
+
"grad_norm": 2.446969985961914,
|
| 2704 |
+
"learning_rate": 3.091173572106561e-06,
|
| 2705 |
+
"loss": 1.8097,
|
| 2706 |
+
"step": 3590
|
| 2707 |
+
},
|
| 2708 |
+
{
|
| 2709 |
+
"epoch": 4.597701149425287,
|
| 2710 |
+
"grad_norm": 2.3991920948028564,
|
| 2711 |
+
"learning_rate": 3.090232698302703e-06,
|
| 2712 |
+
"loss": 1.7007,
|
| 2713 |
+
"step": 3600
|
| 2714 |
+
},
|
| 2715 |
+
{
|
| 2716 |
+
"epoch": 4.597701149425287,
|
| 2717 |
+
"eval_loss": 1.9395427703857422,
|
| 2718 |
+
"eval_runtime": 24.7084,
|
| 2719 |
+
"eval_samples_per_second": 20.236,
|
| 2720 |
+
"eval_steps_per_second": 20.236,
|
| 2721 |
+
"step": 3600
|
| 2722 |
}
|
| 2723 |
],
|
| 2724 |
"logging_steps": 10,
|
|
|
|
| 2738 |
"attributes": {}
|
| 2739 |
}
|
| 2740 |
},
|
| 2741 |
+
"total_flos": 4.988587672549786e+16,
|
| 2742 |
"train_batch_size": 16,
|
| 2743 |
"trial_name": null,
|
| 2744 |
"trial_params": null
|