{ "best_metric": 0.8856489658355713, "best_model_checkpoint": "autotrain-1hkeo-o33ms/checkpoint-339", "epoch": 3.0, "eval_steps": 500, "global_step": 339, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.017699115044247787, "grad_norm": NaN, "learning_rate": 0.0, "loss": 2.8516, "step": 2 }, { "epoch": 0.035398230088495575, "grad_norm": 106.12066650390625, "learning_rate": 1.4705882352941177e-06, "loss": 2.7397, "step": 4 }, { "epoch": 0.05309734513274336, "grad_norm": 239.92628479003906, "learning_rate": 2.9411764705882355e-06, "loss": 2.5233, "step": 6 }, { "epoch": 0.07079646017699115, "grad_norm": 219.21218872070312, "learning_rate": 5.882352941176471e-06, "loss": 2.7366, "step": 8 }, { "epoch": 0.08849557522123894, "grad_norm": 214.71096801757812, "learning_rate": 8.823529411764707e-06, "loss": 2.6004, "step": 10 }, { "epoch": 0.10619469026548672, "grad_norm": Infinity, "learning_rate": 8.823529411764707e-06, "loss": 2.4951, "step": 12 }, { "epoch": 0.12389380530973451, "grad_norm": 320.1042785644531, "learning_rate": 1.1764705882352942e-05, "loss": 1.8943, "step": 14 }, { "epoch": 0.1415929203539823, "grad_norm": 1234.984619140625, "learning_rate": 1.4705882352941177e-05, "loss": 2.1298, "step": 16 }, { "epoch": 0.1592920353982301, "grad_norm": 85.23419189453125, "learning_rate": 1.7647058823529414e-05, "loss": 1.978, "step": 18 }, { "epoch": 0.17699115044247787, "grad_norm": 148.04153442382812, "learning_rate": 2.058823529411765e-05, "loss": 1.9334, "step": 20 }, { "epoch": 0.19469026548672566, "grad_norm": 70.3712387084961, "learning_rate": 2.3529411764705884e-05, "loss": 1.9477, "step": 22 }, { "epoch": 0.21238938053097345, "grad_norm": 279.7557678222656, "learning_rate": 2.647058823529412e-05, "loss": 1.8483, "step": 24 }, { "epoch": 0.23008849557522124, "grad_norm": 294.1581115722656, "learning_rate": 2.9411764705882354e-05, "loss": 1.5582, "step": 26 }, { "epoch": 0.24778761061946902, "grad_norm": 110.91650390625, "learning_rate": 3.235294117647059e-05, "loss": 1.5439, "step": 28 }, { "epoch": 0.26548672566371684, "grad_norm": 68.54044342041016, "learning_rate": 3.529411764705883e-05, "loss": 1.5531, "step": 30 }, { "epoch": 0.2831858407079646, "grad_norm": 67.14613342285156, "learning_rate": 3.8235294117647055e-05, "loss": 1.5641, "step": 32 }, { "epoch": 0.3008849557522124, "grad_norm": 73.9820785522461, "learning_rate": 4.11764705882353e-05, "loss": 1.687, "step": 34 }, { "epoch": 0.3185840707964602, "grad_norm": 38.29616928100586, "learning_rate": 4.411764705882353e-05, "loss": 1.4187, "step": 36 }, { "epoch": 0.336283185840708, "grad_norm": 84.34982299804688, "learning_rate": 4.705882352941177e-05, "loss": 1.3998, "step": 38 }, { "epoch": 0.35398230088495575, "grad_norm": 188.01791381835938, "learning_rate": 5e-05, "loss": 1.477, "step": 40 }, { "epoch": 0.37168141592920356, "grad_norm": 443.12078857421875, "learning_rate": 4.967213114754098e-05, "loss": 1.4343, "step": 42 }, { "epoch": 0.3893805309734513, "grad_norm": 70.84330749511719, "learning_rate": 4.934426229508197e-05, "loss": 1.4312, "step": 44 }, { "epoch": 0.40707964601769914, "grad_norm": 151.94332885742188, "learning_rate": 4.9016393442622957e-05, "loss": 1.5684, "step": 46 }, { "epoch": 0.4247787610619469, "grad_norm": 44.81299591064453, "learning_rate": 4.868852459016394e-05, "loss": 1.6841, "step": 48 }, { "epoch": 0.4424778761061947, "grad_norm": 212.30218505859375, "learning_rate": 4.836065573770492e-05, "loss": 1.3116, "step": 50 }, { "epoch": 0.46017699115044247, "grad_norm": 146.485595703125, "learning_rate": 4.8032786885245904e-05, "loss": 1.5651, "step": 52 }, { "epoch": 0.4778761061946903, "grad_norm": 56.38231658935547, "learning_rate": 4.770491803278689e-05, "loss": 1.611, "step": 54 }, { "epoch": 0.49557522123893805, "grad_norm": 168.2674102783203, "learning_rate": 4.737704918032787e-05, "loss": 1.5692, "step": 56 }, { "epoch": 0.5132743362831859, "grad_norm": 62.2509765625, "learning_rate": 4.704918032786885e-05, "loss": 1.8457, "step": 58 }, { "epoch": 0.5309734513274337, "grad_norm": 272.4936828613281, "learning_rate": 4.672131147540984e-05, "loss": 1.5168, "step": 60 }, { "epoch": 0.5486725663716814, "grad_norm": 45.84150695800781, "learning_rate": 4.6393442622950825e-05, "loss": 1.3566, "step": 62 }, { "epoch": 0.5663716814159292, "grad_norm": 78.61536407470703, "learning_rate": 4.6065573770491805e-05, "loss": 1.2089, "step": 64 }, { "epoch": 0.584070796460177, "grad_norm": 65.8510513305664, "learning_rate": 4.5737704918032786e-05, "loss": 1.2913, "step": 66 }, { "epoch": 0.6017699115044248, "grad_norm": 90.05781555175781, "learning_rate": 4.540983606557377e-05, "loss": 1.193, "step": 68 }, { "epoch": 0.6194690265486725, "grad_norm": 87.59015655517578, "learning_rate": 4.508196721311476e-05, "loss": 1.3291, "step": 70 }, { "epoch": 0.6371681415929203, "grad_norm": 52.730377197265625, "learning_rate": 4.475409836065574e-05, "loss": 1.2274, "step": 72 }, { "epoch": 0.6548672566371682, "grad_norm": 39.25498962402344, "learning_rate": 4.442622950819673e-05, "loss": 1.2745, "step": 74 }, { "epoch": 0.672566371681416, "grad_norm": 31.809101104736328, "learning_rate": 4.409836065573771e-05, "loss": 1.1752, "step": 76 }, { "epoch": 0.6902654867256637, "grad_norm": 48.21644592285156, "learning_rate": 4.377049180327869e-05, "loss": 1.313, "step": 78 }, { "epoch": 0.7079646017699115, "grad_norm": 37.171226501464844, "learning_rate": 4.3442622950819674e-05, "loss": 1.2738, "step": 80 }, { "epoch": 0.7256637168141593, "grad_norm": 129.79141235351562, "learning_rate": 4.311475409836066e-05, "loss": 1.1686, "step": 82 }, { "epoch": 0.7433628318584071, "grad_norm": 77.16220092773438, "learning_rate": 4.278688524590164e-05, "loss": 1.3718, "step": 84 }, { "epoch": 0.7610619469026548, "grad_norm": 45.43793487548828, "learning_rate": 4.245901639344262e-05, "loss": 1.1993, "step": 86 }, { "epoch": 0.7787610619469026, "grad_norm": 130.69161987304688, "learning_rate": 4.213114754098361e-05, "loss": 1.052, "step": 88 }, { "epoch": 0.7964601769911505, "grad_norm": 42.368804931640625, "learning_rate": 4.1803278688524595e-05, "loss": 1.0408, "step": 90 }, { "epoch": 0.8141592920353983, "grad_norm": 42.31563949584961, "learning_rate": 4.1475409836065575e-05, "loss": 1.3483, "step": 92 }, { "epoch": 0.831858407079646, "grad_norm": 47.8664665222168, "learning_rate": 4.1147540983606556e-05, "loss": 1.2632, "step": 94 }, { "epoch": 0.8495575221238938, "grad_norm": 64.5959243774414, "learning_rate": 4.081967213114754e-05, "loss": 1.1907, "step": 96 }, { "epoch": 0.8672566371681416, "grad_norm": 260.2913513183594, "learning_rate": 4.049180327868853e-05, "loss": 1.1238, "step": 98 }, { "epoch": 0.8849557522123894, "grad_norm": 42.65099334716797, "learning_rate": 4.016393442622951e-05, "loss": 1.3226, "step": 100 }, { "epoch": 0.9026548672566371, "grad_norm": 42.4586181640625, "learning_rate": 3.983606557377049e-05, "loss": 1.2652, "step": 102 }, { "epoch": 0.9203539823008849, "grad_norm": 50.917659759521484, "learning_rate": 3.950819672131148e-05, "loss": 1.1128, "step": 104 }, { "epoch": 0.9380530973451328, "grad_norm": 90.57892608642578, "learning_rate": 3.9180327868852464e-05, "loss": 1.5618, "step": 106 }, { "epoch": 0.9557522123893806, "grad_norm": 126.1560287475586, "learning_rate": 3.8852459016393444e-05, "loss": 1.5922, "step": 108 }, { "epoch": 0.9734513274336283, "grad_norm": 45.01774978637695, "learning_rate": 3.8524590163934424e-05, "loss": 1.4205, "step": 110 }, { "epoch": 0.9911504424778761, "grad_norm": 49.559791564941406, "learning_rate": 3.819672131147541e-05, "loss": 1.2759, "step": 112 }, { "epoch": 1.0, "eval_loss": 1.0817503929138184, "eval_map": 0.0905, "eval_map_50": 0.1567, "eval_map_75": 0.0974, "eval_map_large": 0.1083, "eval_map_medium": 0.0595, "eval_map_per_class": 0.0905, "eval_map_small": 0.0, "eval_mar_1": 0.2077, "eval_mar_10": 0.3979, "eval_mar_100": 0.6491, "eval_mar_100_per_class": 0.6491, "eval_mar_large": 0.7243, "eval_mar_medium": 0.4609, "eval_mar_small": 0.0, "eval_runtime": 6.9741, "eval_samples_per_second": 14.339, "eval_steps_per_second": 1.004, "step": 113 }, { "epoch": 1.008849557522124, "grad_norm": 55.82676315307617, "learning_rate": 3.78688524590164e-05, "loss": 1.3204, "step": 114 }, { "epoch": 1.0265486725663717, "grad_norm": 56.670345306396484, "learning_rate": 3.754098360655738e-05, "loss": 1.0837, "step": 116 }, { "epoch": 1.0442477876106195, "grad_norm": 166.82949829101562, "learning_rate": 3.721311475409836e-05, "loss": 1.1722, "step": 118 }, { "epoch": 1.0619469026548674, "grad_norm": 83.52169036865234, "learning_rate": 3.6885245901639346e-05, "loss": 1.2194, "step": 120 }, { "epoch": 1.079646017699115, "grad_norm": 56.17623519897461, "learning_rate": 3.655737704918033e-05, "loss": 1.1828, "step": 122 }, { "epoch": 1.0973451327433628, "grad_norm": 24.509546279907227, "learning_rate": 3.622950819672131e-05, "loss": 1.009, "step": 124 }, { "epoch": 1.1150442477876106, "grad_norm": 80.25348663330078, "learning_rate": 3.590163934426229e-05, "loss": 1.1667, "step": 126 }, { "epoch": 1.1327433628318584, "grad_norm": 98.71533966064453, "learning_rate": 3.557377049180328e-05, "loss": 1.1607, "step": 128 }, { "epoch": 1.1504424778761062, "grad_norm": 42.726444244384766, "learning_rate": 3.524590163934427e-05, "loss": 1.0344, "step": 130 }, { "epoch": 1.168141592920354, "grad_norm": 47.48421859741211, "learning_rate": 3.491803278688525e-05, "loss": 1.1513, "step": 132 }, { "epoch": 1.1858407079646018, "grad_norm": 38.87162780761719, "learning_rate": 3.459016393442623e-05, "loss": 1.0672, "step": 134 }, { "epoch": 1.2035398230088497, "grad_norm": 30.703020095825195, "learning_rate": 3.4262295081967214e-05, "loss": 1.2524, "step": 136 }, { "epoch": 1.2212389380530975, "grad_norm": 62.74378967285156, "learning_rate": 3.39344262295082e-05, "loss": 1.0351, "step": 138 }, { "epoch": 1.238938053097345, "grad_norm": 32.68797302246094, "learning_rate": 3.360655737704918e-05, "loss": 1.0135, "step": 140 }, { "epoch": 1.2566371681415929, "grad_norm": 260.8392639160156, "learning_rate": 3.327868852459017e-05, "loss": 1.1592, "step": 142 }, { "epoch": 1.2743362831858407, "grad_norm": 102.03837585449219, "learning_rate": 3.295081967213115e-05, "loss": 1.0648, "step": 144 }, { "epoch": 1.2920353982300885, "grad_norm": 50.19871520996094, "learning_rate": 3.2622950819672136e-05, "loss": 1.0932, "step": 146 }, { "epoch": 1.3097345132743363, "grad_norm": 108.73077392578125, "learning_rate": 3.2295081967213116e-05, "loss": 1.0388, "step": 148 }, { "epoch": 1.3274336283185841, "grad_norm": 166.00540161132812, "learning_rate": 3.19672131147541e-05, "loss": 1.189, "step": 150 }, { "epoch": 1.3451327433628317, "grad_norm": 29.591773986816406, "learning_rate": 3.163934426229508e-05, "loss": 0.9697, "step": 152 }, { "epoch": 1.3628318584070795, "grad_norm": 96.86643981933594, "learning_rate": 3.131147540983606e-05, "loss": 1.1525, "step": 154 }, { "epoch": 1.3805309734513274, "grad_norm": 52.641666412353516, "learning_rate": 3.098360655737705e-05, "loss": 1.1335, "step": 156 }, { "epoch": 1.3982300884955752, "grad_norm": 34.866493225097656, "learning_rate": 3.065573770491804e-05, "loss": 1.285, "step": 158 }, { "epoch": 1.415929203539823, "grad_norm": 23.98964500427246, "learning_rate": 3.0327868852459017e-05, "loss": 1.2042, "step": 160 }, { "epoch": 1.4336283185840708, "grad_norm": 145.60592651367188, "learning_rate": 3e-05, "loss": 1.29, "step": 162 }, { "epoch": 1.4513274336283186, "grad_norm": 53.379520416259766, "learning_rate": 2.967213114754098e-05, "loss": 1.1392, "step": 164 }, { "epoch": 1.4690265486725664, "grad_norm": 60.14817428588867, "learning_rate": 2.934426229508197e-05, "loss": 1.2606, "step": 166 }, { "epoch": 1.4867256637168142, "grad_norm": 31.738466262817383, "learning_rate": 2.901639344262295e-05, "loss": 1.1376, "step": 168 }, { "epoch": 1.504424778761062, "grad_norm": 38.88958740234375, "learning_rate": 2.8688524590163935e-05, "loss": 1.1329, "step": 170 }, { "epoch": 1.5221238938053099, "grad_norm": 61.10131072998047, "learning_rate": 2.8360655737704922e-05, "loss": 1.0537, "step": 172 }, { "epoch": 1.5398230088495575, "grad_norm": 43.02183532714844, "learning_rate": 2.8032786885245906e-05, "loss": 1.1499, "step": 174 }, { "epoch": 1.5575221238938053, "grad_norm": 37.601444244384766, "learning_rate": 2.7704918032786886e-05, "loss": 1.3708, "step": 176 }, { "epoch": 1.575221238938053, "grad_norm": 29.7570743560791, "learning_rate": 2.737704918032787e-05, "loss": 1.138, "step": 178 }, { "epoch": 1.592920353982301, "grad_norm": 94.0286865234375, "learning_rate": 2.7049180327868856e-05, "loss": 1.0933, "step": 180 }, { "epoch": 1.6106194690265485, "grad_norm": 38.72492599487305, "learning_rate": 2.6721311475409837e-05, "loss": 0.9599, "step": 182 }, { "epoch": 1.6283185840707963, "grad_norm": 30.70404624938965, "learning_rate": 2.639344262295082e-05, "loss": 1.0226, "step": 184 }, { "epoch": 1.6460176991150441, "grad_norm": 57.246028900146484, "learning_rate": 2.6065573770491804e-05, "loss": 0.9137, "step": 186 }, { "epoch": 1.663716814159292, "grad_norm": 30.864009857177734, "learning_rate": 2.573770491803279e-05, "loss": 1.0321, "step": 188 }, { "epoch": 1.6814159292035398, "grad_norm": 21.78902816772461, "learning_rate": 2.540983606557377e-05, "loss": 1.1898, "step": 190 }, { "epoch": 1.6991150442477876, "grad_norm": 50.869197845458984, "learning_rate": 2.5081967213114754e-05, "loss": 1.2291, "step": 192 }, { "epoch": 1.7168141592920354, "grad_norm": 81.57111358642578, "learning_rate": 2.4754098360655738e-05, "loss": 1.1021, "step": 194 }, { "epoch": 1.7345132743362832, "grad_norm": 28.92418098449707, "learning_rate": 2.442622950819672e-05, "loss": 0.9841, "step": 196 }, { "epoch": 1.752212389380531, "grad_norm": 23.563941955566406, "learning_rate": 2.4098360655737705e-05, "loss": 1.4778, "step": 198 }, { "epoch": 1.7699115044247788, "grad_norm": 39.69824981689453, "learning_rate": 2.377049180327869e-05, "loss": 1.0115, "step": 200 }, { "epoch": 1.7876106194690267, "grad_norm": 503.5774841308594, "learning_rate": 2.3442622950819672e-05, "loss": 1.3526, "step": 202 }, { "epoch": 1.8053097345132745, "grad_norm": 47.411170959472656, "learning_rate": 2.311475409836066e-05, "loss": 1.0035, "step": 204 }, { "epoch": 1.823008849557522, "grad_norm": 60.6224479675293, "learning_rate": 2.278688524590164e-05, "loss": 1.2126, "step": 206 }, { "epoch": 1.8407079646017699, "grad_norm": 51.3713264465332, "learning_rate": 2.2459016393442626e-05, "loss": 1.2112, "step": 208 }, { "epoch": 1.8584070796460177, "grad_norm": 43.3449821472168, "learning_rate": 2.2131147540983607e-05, "loss": 1.1825, "step": 210 }, { "epoch": 1.8761061946902655, "grad_norm": 51.22021484375, "learning_rate": 2.1803278688524594e-05, "loss": 1.0688, "step": 212 }, { "epoch": 1.893805309734513, "grad_norm": 47.7821044921875, "learning_rate": 2.1475409836065574e-05, "loss": 1.1192, "step": 214 }, { "epoch": 1.911504424778761, "grad_norm": 26.08221435546875, "learning_rate": 2.114754098360656e-05, "loss": 0.9688, "step": 216 }, { "epoch": 1.9292035398230087, "grad_norm": 35.32424545288086, "learning_rate": 2.081967213114754e-05, "loss": 0.96, "step": 218 }, { "epoch": 1.9469026548672566, "grad_norm": 33.47419357299805, "learning_rate": 2.0491803278688525e-05, "loss": 1.237, "step": 220 }, { "epoch": 1.9646017699115044, "grad_norm": 54.51090621948242, "learning_rate": 2.0163934426229508e-05, "loss": 1.0218, "step": 222 }, { "epoch": 1.9823008849557522, "grad_norm": 46.36600875854492, "learning_rate": 1.9836065573770492e-05, "loss": 1.0245, "step": 224 }, { "epoch": 2.0, "grad_norm": 39.30439758300781, "learning_rate": 1.9508196721311475e-05, "loss": 1.0934, "step": 226 }, { "epoch": 2.0, "eval_loss": 1.2027697563171387, "eval_map": 0.186, "eval_map_50": 0.3347, "eval_map_75": 0.2021, "eval_map_large": 0.2317, "eval_map_medium": 0.1023, "eval_map_per_class": 0.186, "eval_map_small": 0.0, "eval_mar_1": 0.2299, "eval_mar_10": 0.4979, "eval_mar_100": 0.5718, "eval_mar_100_per_class": 0.5718, "eval_mar_large": 0.6669, "eval_mar_medium": 0.3297, "eval_mar_small": 0.0, "eval_runtime": 6.8411, "eval_samples_per_second": 14.617, "eval_steps_per_second": 1.023, "step": 226 }, { "epoch": 2.017699115044248, "grad_norm": 64.81815338134766, "learning_rate": 1.918032786885246e-05, "loss": 0.9753, "step": 228 }, { "epoch": 2.0353982300884956, "grad_norm": 33.4212532043457, "learning_rate": 1.8852459016393442e-05, "loss": 1.0836, "step": 230 }, { "epoch": 2.0530973451327434, "grad_norm": 43.58014678955078, "learning_rate": 1.8524590163934426e-05, "loss": 0.9376, "step": 232 }, { "epoch": 2.0707964601769913, "grad_norm": 49.71245574951172, "learning_rate": 1.8196721311475413e-05, "loss": 1.088, "step": 234 }, { "epoch": 2.088495575221239, "grad_norm": 37.22849655151367, "learning_rate": 1.7868852459016393e-05, "loss": 1.1392, "step": 236 }, { "epoch": 2.106194690265487, "grad_norm": 37.5290412902832, "learning_rate": 1.754098360655738e-05, "loss": 1.0028, "step": 238 }, { "epoch": 2.1238938053097347, "grad_norm": 39.413692474365234, "learning_rate": 1.721311475409836e-05, "loss": 1.0709, "step": 240 }, { "epoch": 2.1415929203539825, "grad_norm": 58.448463439941406, "learning_rate": 1.6885245901639347e-05, "loss": 1.1094, "step": 242 }, { "epoch": 2.15929203539823, "grad_norm": 49.975345611572266, "learning_rate": 1.6557377049180328e-05, "loss": 1.0425, "step": 244 }, { "epoch": 2.1769911504424777, "grad_norm": 40.28168487548828, "learning_rate": 1.6229508196721314e-05, "loss": 1.1666, "step": 246 }, { "epoch": 2.1946902654867255, "grad_norm": 123.18711853027344, "learning_rate": 1.5901639344262295e-05, "loss": 0.9255, "step": 248 }, { "epoch": 2.2123893805309733, "grad_norm": 35.56529998779297, "learning_rate": 1.557377049180328e-05, "loss": 0.9732, "step": 250 }, { "epoch": 2.230088495575221, "grad_norm": 48.11013412475586, "learning_rate": 1.5245901639344262e-05, "loss": 0.9164, "step": 252 }, { "epoch": 2.247787610619469, "grad_norm": 33.767940521240234, "learning_rate": 1.4918032786885247e-05, "loss": 0.9673, "step": 254 }, { "epoch": 2.265486725663717, "grad_norm": 105.49449157714844, "learning_rate": 1.4590163934426229e-05, "loss": 0.9585, "step": 256 }, { "epoch": 2.2831858407079646, "grad_norm": 77.859619140625, "learning_rate": 1.4262295081967214e-05, "loss": 0.9652, "step": 258 }, { "epoch": 2.3008849557522124, "grad_norm": 30.470129013061523, "learning_rate": 1.3934426229508196e-05, "loss": 0.9409, "step": 260 }, { "epoch": 2.3185840707964602, "grad_norm": 25.803863525390625, "learning_rate": 1.3606557377049181e-05, "loss": 0.8861, "step": 262 }, { "epoch": 2.336283185840708, "grad_norm": 19.344310760498047, "learning_rate": 1.3278688524590163e-05, "loss": 1.0303, "step": 264 }, { "epoch": 2.353982300884956, "grad_norm": 30.006305694580078, "learning_rate": 1.2950819672131149e-05, "loss": 1.0346, "step": 266 }, { "epoch": 2.3716814159292037, "grad_norm": 53.978092193603516, "learning_rate": 1.2622950819672132e-05, "loss": 0.9385, "step": 268 }, { "epoch": 2.3893805309734515, "grad_norm": 40.302066802978516, "learning_rate": 1.2295081967213116e-05, "loss": 0.9354, "step": 270 }, { "epoch": 2.4070796460176993, "grad_norm": 41.98043441772461, "learning_rate": 1.19672131147541e-05, "loss": 0.9648, "step": 272 }, { "epoch": 2.4247787610619467, "grad_norm": 18.923696517944336, "learning_rate": 1.1639344262295083e-05, "loss": 0.8274, "step": 274 }, { "epoch": 2.442477876106195, "grad_norm": 115.89277648925781, "learning_rate": 1.1311475409836065e-05, "loss": 0.905, "step": 276 }, { "epoch": 2.4601769911504423, "grad_norm": 45.129112243652344, "learning_rate": 1.0983606557377048e-05, "loss": 1.116, "step": 278 }, { "epoch": 2.47787610619469, "grad_norm": 40.201717376708984, "learning_rate": 1.0655737704918032e-05, "loss": 0.964, "step": 280 }, { "epoch": 2.495575221238938, "grad_norm": 53.22237777709961, "learning_rate": 1.0327868852459017e-05, "loss": 0.9607, "step": 282 }, { "epoch": 2.5132743362831858, "grad_norm": 49.16684341430664, "learning_rate": 1e-05, "loss": 1.0343, "step": 284 }, { "epoch": 2.5309734513274336, "grad_norm": 99.20750427246094, "learning_rate": 9.672131147540984e-06, "loss": 1.122, "step": 286 }, { "epoch": 2.5486725663716814, "grad_norm": 24.961946487426758, "learning_rate": 9.344262295081968e-06, "loss": 0.9803, "step": 288 }, { "epoch": 2.566371681415929, "grad_norm": 37.47533416748047, "learning_rate": 9.016393442622952e-06, "loss": 1.0125, "step": 290 }, { "epoch": 2.584070796460177, "grad_norm": 25.763242721557617, "learning_rate": 8.688524590163935e-06, "loss": 0.9614, "step": 292 }, { "epoch": 2.601769911504425, "grad_norm": 25.674652099609375, "learning_rate": 8.360655737704919e-06, "loss": 0.8985, "step": 294 }, { "epoch": 2.6194690265486726, "grad_norm": 26.32975196838379, "learning_rate": 8.032786885245902e-06, "loss": 0.9328, "step": 296 }, { "epoch": 2.6371681415929205, "grad_norm": 37.20893096923828, "learning_rate": 7.704918032786886e-06, "loss": 0.8681, "step": 298 }, { "epoch": 2.6548672566371683, "grad_norm": 21.895004272460938, "learning_rate": 7.3770491803278695e-06, "loss": 1.0877, "step": 300 }, { "epoch": 2.672566371681416, "grad_norm": 433.6007080078125, "learning_rate": 7.049180327868852e-06, "loss": 1.02, "step": 302 }, { "epoch": 2.6902654867256635, "grad_norm": 51.63364028930664, "learning_rate": 6.721311475409836e-06, "loss": 0.9108, "step": 304 }, { "epoch": 2.7079646017699117, "grad_norm": 31.832887649536133, "learning_rate": 6.393442622950819e-06, "loss": 1.2122, "step": 306 }, { "epoch": 2.725663716814159, "grad_norm": 52.633541107177734, "learning_rate": 6.065573770491804e-06, "loss": 1.0646, "step": 308 }, { "epoch": 2.7433628318584073, "grad_norm": 105.65243530273438, "learning_rate": 5.737704918032787e-06, "loss": 0.9322, "step": 310 }, { "epoch": 2.7610619469026547, "grad_norm": 23.349111557006836, "learning_rate": 5.409836065573771e-06, "loss": 1.0782, "step": 312 }, { "epoch": 2.7787610619469025, "grad_norm": 59.555641174316406, "learning_rate": 5.0819672131147545e-06, "loss": 0.8693, "step": 314 }, { "epoch": 2.7964601769911503, "grad_norm": 36.903785705566406, "learning_rate": 4.754098360655738e-06, "loss": 0.8108, "step": 316 }, { "epoch": 2.814159292035398, "grad_norm": 30.89893913269043, "learning_rate": 4.426229508196722e-06, "loss": 0.9355, "step": 318 }, { "epoch": 2.831858407079646, "grad_norm": 109.97364807128906, "learning_rate": 4.098360655737704e-06, "loss": 0.9326, "step": 320 }, { "epoch": 2.849557522123894, "grad_norm": 38.92977523803711, "learning_rate": 3.770491803278689e-06, "loss": 1.0016, "step": 322 }, { "epoch": 2.8672566371681416, "grad_norm": 28.991559982299805, "learning_rate": 3.4426229508196724e-06, "loss": 0.9364, "step": 324 }, { "epoch": 2.8849557522123894, "grad_norm": 49.69723892211914, "learning_rate": 3.114754098360656e-06, "loss": 1.056, "step": 326 }, { "epoch": 2.9026548672566372, "grad_norm": 50.34890365600586, "learning_rate": 2.7868852459016396e-06, "loss": 0.8493, "step": 328 }, { "epoch": 2.920353982300885, "grad_norm": 29.193132400512695, "learning_rate": 2.459016393442623e-06, "loss": 0.9151, "step": 330 }, { "epoch": 2.938053097345133, "grad_norm": 120.2429428100586, "learning_rate": 2.1311475409836067e-06, "loss": 0.8943, "step": 332 }, { "epoch": 2.9557522123893807, "grad_norm": 84.27303314208984, "learning_rate": 1.8032786885245903e-06, "loss": 0.9302, "step": 334 }, { "epoch": 2.9734513274336285, "grad_norm": 19.526248931884766, "learning_rate": 1.4754098360655739e-06, "loss": 0.8956, "step": 336 }, { "epoch": 2.991150442477876, "grad_norm": 51.6258430480957, "learning_rate": 1.1475409836065575e-06, "loss": 0.8992, "step": 338 }, { "epoch": 3.0, "eval_loss": 0.8856489658355713, "eval_map": 0.1595, "eval_map_50": 0.2552, "eval_map_75": 0.1847, "eval_map_large": 0.1961, "eval_map_medium": 0.0755, "eval_map_per_class": 0.1595, "eval_map_small": 0.0, "eval_mar_1": 0.2338, "eval_mar_10": 0.5162, "eval_mar_100": 0.6897, "eval_mar_100_per_class": 0.6897, "eval_mar_large": 0.7544, "eval_mar_medium": 0.5297, "eval_mar_small": 0.0, "eval_runtime": 6.7061, "eval_samples_per_second": 14.912, "eval_steps_per_second": 1.044, "step": 339 } ], "logging_steps": 2, "max_steps": 339, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.05586283531952e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }