Training in progress, step 426, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100198584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:429776d8ec0c81ce8d08e40e885e46127b8184a5904f1cf0a1e91a4b38cdc560
|
3 |
size 100198584
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50675604
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc16f8d1d14f42458e9f82501393c087cd1eb38fd3f59c3b432671f63aa669a8
|
3 |
size 50675604
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:933f690365711cd7ef49473934a6fcf025d686d0b3c5cc973fe5e24070e2c3b3
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2814,6 +2814,181 @@
|
|
2814 |
"learning_rate": 1.2577065351418003e-05,
|
2815 |
"loss": 1.0427,
|
2816 |
"step": 401
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2817 |
}
|
2818 |
],
|
2819 |
"logging_steps": 1,
|
@@ -2833,7 +3008,7 @@
|
|
2833 |
"attributes": {}
|
2834 |
}
|
2835 |
},
|
2836 |
-
"total_flos": 4.
|
2837 |
"train_batch_size": 4,
|
2838 |
"trial_name": null,
|
2839 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.4674897119341564,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 426,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2814 |
"learning_rate": 1.2577065351418003e-05,
|
2815 |
"loss": 1.0427,
|
2816 |
"step": 401
|
2817 |
+
},
|
2818 |
+
{
|
2819 |
+
"epoch": 0.4411522633744856,
|
2820 |
+
"grad_norm": 0.12298054248094559,
|
2821 |
+
"learning_rate": 1.2552404438964243e-05,
|
2822 |
+
"loss": 1.1429,
|
2823 |
+
"step": 402
|
2824 |
+
},
|
2825 |
+
{
|
2826 |
+
"epoch": 0.4422496570644719,
|
2827 |
+
"grad_norm": 0.10508795082569122,
|
2828 |
+
"learning_rate": 1.2527743526510482e-05,
|
2829 |
+
"loss": 1.2339,
|
2830 |
+
"step": 403
|
2831 |
+
},
|
2832 |
+
{
|
2833 |
+
"epoch": 0.4433470507544582,
|
2834 |
+
"grad_norm": 0.14140458405017853,
|
2835 |
+
"learning_rate": 1.2503082614056721e-05,
|
2836 |
+
"loss": 1.0924,
|
2837 |
+
"step": 404
|
2838 |
+
},
|
2839 |
+
{
|
2840 |
+
"epoch": 0.4444444444444444,
|
2841 |
+
"grad_norm": 0.16051869094371796,
|
2842 |
+
"learning_rate": 1.247842170160296e-05,
|
2843 |
+
"loss": 1.055,
|
2844 |
+
"step": 405
|
2845 |
+
},
|
2846 |
+
{
|
2847 |
+
"epoch": 0.4455418381344307,
|
2848 |
+
"grad_norm": 0.12968482077121735,
|
2849 |
+
"learning_rate": 1.2453760789149199e-05,
|
2850 |
+
"loss": 1.1362,
|
2851 |
+
"step": 406
|
2852 |
+
},
|
2853 |
+
{
|
2854 |
+
"epoch": 0.446639231824417,
|
2855 |
+
"grad_norm": 0.12716621160507202,
|
2856 |
+
"learning_rate": 1.2429099876695438e-05,
|
2857 |
+
"loss": 1.0987,
|
2858 |
+
"step": 407
|
2859 |
+
},
|
2860 |
+
{
|
2861 |
+
"epoch": 0.4477366255144033,
|
2862 |
+
"grad_norm": 0.15174546837806702,
|
2863 |
+
"learning_rate": 1.2404438964241678e-05,
|
2864 |
+
"loss": 1.1899,
|
2865 |
+
"step": 408
|
2866 |
+
},
|
2867 |
+
{
|
2868 |
+
"epoch": 0.4488340192043896,
|
2869 |
+
"grad_norm": 0.1363244652748108,
|
2870 |
+
"learning_rate": 1.2379778051787916e-05,
|
2871 |
+
"loss": 1.1274,
|
2872 |
+
"step": 409
|
2873 |
+
},
|
2874 |
+
{
|
2875 |
+
"epoch": 0.4499314128943759,
|
2876 |
+
"grad_norm": 0.11740902811288834,
|
2877 |
+
"learning_rate": 1.2355117139334156e-05,
|
2878 |
+
"loss": 1.0766,
|
2879 |
+
"step": 410
|
2880 |
+
},
|
2881 |
+
{
|
2882 |
+
"epoch": 0.4510288065843621,
|
2883 |
+
"grad_norm": 0.1075834259390831,
|
2884 |
+
"learning_rate": 1.2330456226880397e-05,
|
2885 |
+
"loss": 1.1318,
|
2886 |
+
"step": 411
|
2887 |
+
},
|
2888 |
+
{
|
2889 |
+
"epoch": 0.4521262002743484,
|
2890 |
+
"grad_norm": 0.11810291558504105,
|
2891 |
+
"learning_rate": 1.2305795314426634e-05,
|
2892 |
+
"loss": 1.1657,
|
2893 |
+
"step": 412
|
2894 |
+
},
|
2895 |
+
{
|
2896 |
+
"epoch": 0.4532235939643347,
|
2897 |
+
"grad_norm": 0.1261415034532547,
|
2898 |
+
"learning_rate": 1.2281134401972875e-05,
|
2899 |
+
"loss": 1.2063,
|
2900 |
+
"step": 413
|
2901 |
+
},
|
2902 |
+
{
|
2903 |
+
"epoch": 0.454320987654321,
|
2904 |
+
"grad_norm": 0.14952872693538666,
|
2905 |
+
"learning_rate": 1.2256473489519114e-05,
|
2906 |
+
"loss": 1.0857,
|
2907 |
+
"step": 414
|
2908 |
+
},
|
2909 |
+
{
|
2910 |
+
"epoch": 0.4554183813443073,
|
2911 |
+
"grad_norm": 0.1363765001296997,
|
2912 |
+
"learning_rate": 1.2231812577065353e-05,
|
2913 |
+
"loss": 1.1038,
|
2914 |
+
"step": 415
|
2915 |
+
},
|
2916 |
+
{
|
2917 |
+
"epoch": 0.4565157750342936,
|
2918 |
+
"grad_norm": 0.1384081095457077,
|
2919 |
+
"learning_rate": 1.2207151664611591e-05,
|
2920 |
+
"loss": 1.1109,
|
2921 |
+
"step": 416
|
2922 |
+
},
|
2923 |
+
{
|
2924 |
+
"epoch": 0.4576131687242798,
|
2925 |
+
"grad_norm": 0.1204955130815506,
|
2926 |
+
"learning_rate": 1.2182490752157832e-05,
|
2927 |
+
"loss": 1.0567,
|
2928 |
+
"step": 417
|
2929 |
+
},
|
2930 |
+
{
|
2931 |
+
"epoch": 0.4587105624142661,
|
2932 |
+
"grad_norm": 0.14003603160381317,
|
2933 |
+
"learning_rate": 1.215782983970407e-05,
|
2934 |
+
"loss": 1.1006,
|
2935 |
+
"step": 418
|
2936 |
+
},
|
2937 |
+
{
|
2938 |
+
"epoch": 0.4598079561042524,
|
2939 |
+
"grad_norm": 0.18735840916633606,
|
2940 |
+
"learning_rate": 1.213316892725031e-05,
|
2941 |
+
"loss": 1.1884,
|
2942 |
+
"step": 419
|
2943 |
+
},
|
2944 |
+
{
|
2945 |
+
"epoch": 0.4609053497942387,
|
2946 |
+
"grad_norm": 0.16455943882465363,
|
2947 |
+
"learning_rate": 1.2108508014796549e-05,
|
2948 |
+
"loss": 1.0925,
|
2949 |
+
"step": 420
|
2950 |
+
},
|
2951 |
+
{
|
2952 |
+
"epoch": 0.462002743484225,
|
2953 |
+
"grad_norm": 0.14214913547039032,
|
2954 |
+
"learning_rate": 1.2083847102342788e-05,
|
2955 |
+
"loss": 1.0512,
|
2956 |
+
"step": 421
|
2957 |
+
},
|
2958 |
+
{
|
2959 |
+
"epoch": 0.4631001371742112,
|
2960 |
+
"grad_norm": 0.12036455422639847,
|
2961 |
+
"learning_rate": 1.2059186189889027e-05,
|
2962 |
+
"loss": 1.2031,
|
2963 |
+
"step": 422
|
2964 |
+
},
|
2965 |
+
{
|
2966 |
+
"epoch": 0.4641975308641975,
|
2967 |
+
"grad_norm": 0.13500386476516724,
|
2968 |
+
"learning_rate": 1.2034525277435265e-05,
|
2969 |
+
"loss": 1.1086,
|
2970 |
+
"step": 423
|
2971 |
+
},
|
2972 |
+
{
|
2973 |
+
"epoch": 0.4652949245541838,
|
2974 |
+
"grad_norm": 0.14389222860336304,
|
2975 |
+
"learning_rate": 1.2009864364981504e-05,
|
2976 |
+
"loss": 1.1124,
|
2977 |
+
"step": 424
|
2978 |
+
},
|
2979 |
+
{
|
2980 |
+
"epoch": 0.4663923182441701,
|
2981 |
+
"grad_norm": 0.14557717740535736,
|
2982 |
+
"learning_rate": 1.1985203452527745e-05,
|
2983 |
+
"loss": 1.1043,
|
2984 |
+
"step": 425
|
2985 |
+
},
|
2986 |
+
{
|
2987 |
+
"epoch": 0.4674897119341564,
|
2988 |
+
"grad_norm": 0.16723041236400604,
|
2989 |
+
"learning_rate": 1.1960542540073982e-05,
|
2990 |
+
"loss": 1.1054,
|
2991 |
+
"step": 426
|
2992 |
}
|
2993 |
],
|
2994 |
"logging_steps": 1,
|
|
|
3008 |
"attributes": {}
|
3009 |
}
|
3010 |
},
|
3011 |
+
"total_flos": 4.423126307946209e+17,
|
3012 |
"train_batch_size": 4,
|
3013 |
"trial_name": null,
|
3014 |
"trial_params": null
|