Training in progress, step 475, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 72673016
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1efd90182dfa6a81ee47d9232456689b3603b96f32f042109f9b660e92bd1d92
|
3 |
size 72673016
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 36892564
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:811fb76be471b2d061d804906e97d50c54cb2cf42f67f6d42e24bb576f4155dc
|
3 |
size 36892564
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c54609d4e8cb89282e95eaa414501a7844ec04587efed4c10cd692e700fa780
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7f40779dea4b6e00c1e20018f59c11c5cbe1ad90972d2ca12df667176352bf8
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2707,13 +2707,163 @@
|
|
2707 |
"learning_rate": 2.4193548387096777e-05,
|
2708 |
"loss": 1.7733,
|
2709 |
"step": 450
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2710 |
}
|
2711 |
],
|
2712 |
"logging_steps": 1,
|
2713 |
"max_steps": 501,
|
2714 |
"num_train_epochs": 1,
|
2715 |
"save_steps": 25,
|
2716 |
-
"total_flos": 2.
|
2717 |
"trial_name": null,
|
2718 |
"trial_params": null
|
2719 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.08980054825597883,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 475,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2707 |
"learning_rate": 2.4193548387096777e-05,
|
2708 |
"loss": 1.7733,
|
2709 |
"step": 450
|
2710 |
+
},
|
2711 |
+
{
|
2712 |
+
"epoch": 0.09,
|
2713 |
+
"learning_rate": 2.3790322580645163e-05,
|
2714 |
+
"loss": 1.6785,
|
2715 |
+
"step": 451
|
2716 |
+
},
|
2717 |
+
{
|
2718 |
+
"epoch": 0.09,
|
2719 |
+
"learning_rate": 2.338709677419355e-05,
|
2720 |
+
"loss": 1.6109,
|
2721 |
+
"step": 452
|
2722 |
+
},
|
2723 |
+
{
|
2724 |
+
"epoch": 0.09,
|
2725 |
+
"learning_rate": 2.2983870967741935e-05,
|
2726 |
+
"loss": 1.5554,
|
2727 |
+
"step": 453
|
2728 |
+
},
|
2729 |
+
{
|
2730 |
+
"epoch": 0.09,
|
2731 |
+
"learning_rate": 2.258064516129032e-05,
|
2732 |
+
"loss": 1.5901,
|
2733 |
+
"step": 454
|
2734 |
+
},
|
2735 |
+
{
|
2736 |
+
"epoch": 0.09,
|
2737 |
+
"learning_rate": 2.217741935483871e-05,
|
2738 |
+
"loss": 1.687,
|
2739 |
+
"step": 455
|
2740 |
+
},
|
2741 |
+
{
|
2742 |
+
"epoch": 0.09,
|
2743 |
+
"learning_rate": 2.1774193548387097e-05,
|
2744 |
+
"loss": 1.7672,
|
2745 |
+
"step": 456
|
2746 |
+
},
|
2747 |
+
{
|
2748 |
+
"epoch": 0.09,
|
2749 |
+
"learning_rate": 2.1370967741935487e-05,
|
2750 |
+
"loss": 1.9025,
|
2751 |
+
"step": 457
|
2752 |
+
},
|
2753 |
+
{
|
2754 |
+
"epoch": 0.09,
|
2755 |
+
"learning_rate": 2.0967741935483873e-05,
|
2756 |
+
"loss": 1.804,
|
2757 |
+
"step": 458
|
2758 |
+
},
|
2759 |
+
{
|
2760 |
+
"epoch": 0.09,
|
2761 |
+
"learning_rate": 2.056451612903226e-05,
|
2762 |
+
"loss": 1.725,
|
2763 |
+
"step": 459
|
2764 |
+
},
|
2765 |
+
{
|
2766 |
+
"epoch": 0.09,
|
2767 |
+
"learning_rate": 2.0161290322580645e-05,
|
2768 |
+
"loss": 1.6565,
|
2769 |
+
"step": 460
|
2770 |
+
},
|
2771 |
+
{
|
2772 |
+
"epoch": 0.09,
|
2773 |
+
"learning_rate": 1.975806451612903e-05,
|
2774 |
+
"loss": 1.8996,
|
2775 |
+
"step": 461
|
2776 |
+
},
|
2777 |
+
{
|
2778 |
+
"epoch": 0.09,
|
2779 |
+
"learning_rate": 1.935483870967742e-05,
|
2780 |
+
"loss": 1.7974,
|
2781 |
+
"step": 462
|
2782 |
+
},
|
2783 |
+
{
|
2784 |
+
"epoch": 0.09,
|
2785 |
+
"learning_rate": 1.8951612903225807e-05,
|
2786 |
+
"loss": 1.8168,
|
2787 |
+
"step": 463
|
2788 |
+
},
|
2789 |
+
{
|
2790 |
+
"epoch": 0.09,
|
2791 |
+
"learning_rate": 1.8548387096774193e-05,
|
2792 |
+
"loss": 1.7012,
|
2793 |
+
"step": 464
|
2794 |
+
},
|
2795 |
+
{
|
2796 |
+
"epoch": 0.09,
|
2797 |
+
"learning_rate": 1.8145161290322583e-05,
|
2798 |
+
"loss": 1.9804,
|
2799 |
+
"step": 465
|
2800 |
+
},
|
2801 |
+
{
|
2802 |
+
"epoch": 0.09,
|
2803 |
+
"learning_rate": 1.774193548387097e-05,
|
2804 |
+
"loss": 1.7656,
|
2805 |
+
"step": 466
|
2806 |
+
},
|
2807 |
+
{
|
2808 |
+
"epoch": 0.09,
|
2809 |
+
"learning_rate": 1.733870967741936e-05,
|
2810 |
+
"loss": 1.5495,
|
2811 |
+
"step": 467
|
2812 |
+
},
|
2813 |
+
{
|
2814 |
+
"epoch": 0.09,
|
2815 |
+
"learning_rate": 1.693548387096774e-05,
|
2816 |
+
"loss": 1.8389,
|
2817 |
+
"step": 468
|
2818 |
+
},
|
2819 |
+
{
|
2820 |
+
"epoch": 0.09,
|
2821 |
+
"learning_rate": 1.653225806451613e-05,
|
2822 |
+
"loss": 1.7781,
|
2823 |
+
"step": 469
|
2824 |
+
},
|
2825 |
+
{
|
2826 |
+
"epoch": 0.09,
|
2827 |
+
"learning_rate": 1.6129032258064517e-05,
|
2828 |
+
"loss": 1.6191,
|
2829 |
+
"step": 470
|
2830 |
+
},
|
2831 |
+
{
|
2832 |
+
"epoch": 0.09,
|
2833 |
+
"learning_rate": 1.5725806451612903e-05,
|
2834 |
+
"loss": 1.6805,
|
2835 |
+
"step": 471
|
2836 |
+
},
|
2837 |
+
{
|
2838 |
+
"epoch": 0.09,
|
2839 |
+
"learning_rate": 1.5322580645161292e-05,
|
2840 |
+
"loss": 1.9133,
|
2841 |
+
"step": 472
|
2842 |
+
},
|
2843 |
+
{
|
2844 |
+
"epoch": 0.09,
|
2845 |
+
"learning_rate": 1.4919354838709679e-05,
|
2846 |
+
"loss": 1.5507,
|
2847 |
+
"step": 473
|
2848 |
+
},
|
2849 |
+
{
|
2850 |
+
"epoch": 0.09,
|
2851 |
+
"learning_rate": 1.4516129032258066e-05,
|
2852 |
+
"loss": 2.0498,
|
2853 |
+
"step": 474
|
2854 |
+
},
|
2855 |
+
{
|
2856 |
+
"epoch": 0.09,
|
2857 |
+
"learning_rate": 1.4112903225806454e-05,
|
2858 |
+
"loss": 1.6849,
|
2859 |
+
"step": 475
|
2860 |
}
|
2861 |
],
|
2862 |
"logging_steps": 1,
|
2863 |
"max_steps": 501,
|
2864 |
"num_train_epochs": 1,
|
2865 |
"save_steps": 25,
|
2866 |
+
"total_flos": 2.279886077804544e+16,
|
2867 |
"trial_name": null,
|
2868 |
"trial_params": null
|
2869 |
}
|