|
{ |
|
"best_metric": 0.8856489658355713, |
|
"best_model_checkpoint": "autotrain-1hkeo-o33ms/checkpoint-339", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 339, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.017699115044247787, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.0, |
|
"loss": 2.8516, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.035398230088495575, |
|
"grad_norm": 106.12066650390625, |
|
"learning_rate": 1.4705882352941177e-06, |
|
"loss": 2.7397, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.05309734513274336, |
|
"grad_norm": 239.92628479003906, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 2.5233, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.07079646017699115, |
|
"grad_norm": 219.21218872070312, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 2.7366, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.08849557522123894, |
|
"grad_norm": 214.71096801757812, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 2.6004, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.10619469026548672, |
|
"grad_norm": Infinity, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 2.4951, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.12389380530973451, |
|
"grad_norm": 320.1042785644531, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 1.8943, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.1415929203539823, |
|
"grad_norm": 1234.984619140625, |
|
"learning_rate": 1.4705882352941177e-05, |
|
"loss": 2.1298, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.1592920353982301, |
|
"grad_norm": 85.23419189453125, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 1.978, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.17699115044247787, |
|
"grad_norm": 148.04153442382812, |
|
"learning_rate": 2.058823529411765e-05, |
|
"loss": 1.9334, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19469026548672566, |
|
"grad_norm": 70.3712387084961, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 1.9477, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.21238938053097345, |
|
"grad_norm": 279.7557678222656, |
|
"learning_rate": 2.647058823529412e-05, |
|
"loss": 1.8483, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.23008849557522124, |
|
"grad_norm": 294.1581115722656, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 1.5582, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.24778761061946902, |
|
"grad_norm": 110.91650390625, |
|
"learning_rate": 3.235294117647059e-05, |
|
"loss": 1.5439, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.26548672566371684, |
|
"grad_norm": 68.54044342041016, |
|
"learning_rate": 3.529411764705883e-05, |
|
"loss": 1.5531, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2831858407079646, |
|
"grad_norm": 67.14613342285156, |
|
"learning_rate": 3.8235294117647055e-05, |
|
"loss": 1.5641, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.3008849557522124, |
|
"grad_norm": 73.9820785522461, |
|
"learning_rate": 4.11764705882353e-05, |
|
"loss": 1.687, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.3185840707964602, |
|
"grad_norm": 38.29616928100586, |
|
"learning_rate": 4.411764705882353e-05, |
|
"loss": 1.4187, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.336283185840708, |
|
"grad_norm": 84.34982299804688, |
|
"learning_rate": 4.705882352941177e-05, |
|
"loss": 1.3998, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.35398230088495575, |
|
"grad_norm": 188.01791381835938, |
|
"learning_rate": 5e-05, |
|
"loss": 1.477, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.37168141592920356, |
|
"grad_norm": 443.12078857421875, |
|
"learning_rate": 4.967213114754098e-05, |
|
"loss": 1.4343, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.3893805309734513, |
|
"grad_norm": 70.84330749511719, |
|
"learning_rate": 4.934426229508197e-05, |
|
"loss": 1.4312, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.40707964601769914, |
|
"grad_norm": 151.94332885742188, |
|
"learning_rate": 4.9016393442622957e-05, |
|
"loss": 1.5684, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.4247787610619469, |
|
"grad_norm": 44.81299591064453, |
|
"learning_rate": 4.868852459016394e-05, |
|
"loss": 1.6841, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.4424778761061947, |
|
"grad_norm": 212.30218505859375, |
|
"learning_rate": 4.836065573770492e-05, |
|
"loss": 1.3116, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.46017699115044247, |
|
"grad_norm": 146.485595703125, |
|
"learning_rate": 4.8032786885245904e-05, |
|
"loss": 1.5651, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.4778761061946903, |
|
"grad_norm": 56.38231658935547, |
|
"learning_rate": 4.770491803278689e-05, |
|
"loss": 1.611, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.49557522123893805, |
|
"grad_norm": 168.2674102783203, |
|
"learning_rate": 4.737704918032787e-05, |
|
"loss": 1.5692, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.5132743362831859, |
|
"grad_norm": 62.2509765625, |
|
"learning_rate": 4.704918032786885e-05, |
|
"loss": 1.8457, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.5309734513274337, |
|
"grad_norm": 272.4936828613281, |
|
"learning_rate": 4.672131147540984e-05, |
|
"loss": 1.5168, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5486725663716814, |
|
"grad_norm": 45.84150695800781, |
|
"learning_rate": 4.6393442622950825e-05, |
|
"loss": 1.3566, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.5663716814159292, |
|
"grad_norm": 78.61536407470703, |
|
"learning_rate": 4.6065573770491805e-05, |
|
"loss": 1.2089, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.584070796460177, |
|
"grad_norm": 65.8510513305664, |
|
"learning_rate": 4.5737704918032786e-05, |
|
"loss": 1.2913, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.6017699115044248, |
|
"grad_norm": 90.05781555175781, |
|
"learning_rate": 4.540983606557377e-05, |
|
"loss": 1.193, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.6194690265486725, |
|
"grad_norm": 87.59015655517578, |
|
"learning_rate": 4.508196721311476e-05, |
|
"loss": 1.3291, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6371681415929203, |
|
"grad_norm": 52.730377197265625, |
|
"learning_rate": 4.475409836065574e-05, |
|
"loss": 1.2274, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.6548672566371682, |
|
"grad_norm": 39.25498962402344, |
|
"learning_rate": 4.442622950819673e-05, |
|
"loss": 1.2745, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.672566371681416, |
|
"grad_norm": 31.809101104736328, |
|
"learning_rate": 4.409836065573771e-05, |
|
"loss": 1.1752, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.6902654867256637, |
|
"grad_norm": 48.21644592285156, |
|
"learning_rate": 4.377049180327869e-05, |
|
"loss": 1.313, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.7079646017699115, |
|
"grad_norm": 37.171226501464844, |
|
"learning_rate": 4.3442622950819674e-05, |
|
"loss": 1.2738, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.7256637168141593, |
|
"grad_norm": 129.79141235351562, |
|
"learning_rate": 4.311475409836066e-05, |
|
"loss": 1.1686, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.7433628318584071, |
|
"grad_norm": 77.16220092773438, |
|
"learning_rate": 4.278688524590164e-05, |
|
"loss": 1.3718, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.7610619469026548, |
|
"grad_norm": 45.43793487548828, |
|
"learning_rate": 4.245901639344262e-05, |
|
"loss": 1.1993, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.7787610619469026, |
|
"grad_norm": 130.69161987304688, |
|
"learning_rate": 4.213114754098361e-05, |
|
"loss": 1.052, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.7964601769911505, |
|
"grad_norm": 42.368804931640625, |
|
"learning_rate": 4.1803278688524595e-05, |
|
"loss": 1.0408, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.8141592920353983, |
|
"grad_norm": 42.31563949584961, |
|
"learning_rate": 4.1475409836065575e-05, |
|
"loss": 1.3483, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.831858407079646, |
|
"grad_norm": 47.8664665222168, |
|
"learning_rate": 4.1147540983606556e-05, |
|
"loss": 1.2632, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.8495575221238938, |
|
"grad_norm": 64.5959243774414, |
|
"learning_rate": 4.081967213114754e-05, |
|
"loss": 1.1907, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.8672566371681416, |
|
"grad_norm": 260.2913513183594, |
|
"learning_rate": 4.049180327868853e-05, |
|
"loss": 1.1238, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.8849557522123894, |
|
"grad_norm": 42.65099334716797, |
|
"learning_rate": 4.016393442622951e-05, |
|
"loss": 1.3226, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9026548672566371, |
|
"grad_norm": 42.4586181640625, |
|
"learning_rate": 3.983606557377049e-05, |
|
"loss": 1.2652, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.9203539823008849, |
|
"grad_norm": 50.917659759521484, |
|
"learning_rate": 3.950819672131148e-05, |
|
"loss": 1.1128, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.9380530973451328, |
|
"grad_norm": 90.57892608642578, |
|
"learning_rate": 3.9180327868852464e-05, |
|
"loss": 1.5618, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.9557522123893806, |
|
"grad_norm": 126.1560287475586, |
|
"learning_rate": 3.8852459016393444e-05, |
|
"loss": 1.5922, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.9734513274336283, |
|
"grad_norm": 45.01774978637695, |
|
"learning_rate": 3.8524590163934424e-05, |
|
"loss": 1.4205, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.9911504424778761, |
|
"grad_norm": 49.559791564941406, |
|
"learning_rate": 3.819672131147541e-05, |
|
"loss": 1.2759, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.0817503929138184, |
|
"eval_map": 0.0905, |
|
"eval_map_50": 0.1567, |
|
"eval_map_75": 0.0974, |
|
"eval_map_large": 0.1083, |
|
"eval_map_medium": 0.0595, |
|
"eval_map_per_class": 0.0905, |
|
"eval_map_small": 0.0, |
|
"eval_mar_1": 0.2077, |
|
"eval_mar_10": 0.3979, |
|
"eval_mar_100": 0.6491, |
|
"eval_mar_100_per_class": 0.6491, |
|
"eval_mar_large": 0.7243, |
|
"eval_mar_medium": 0.4609, |
|
"eval_mar_small": 0.0, |
|
"eval_runtime": 6.9741, |
|
"eval_samples_per_second": 14.339, |
|
"eval_steps_per_second": 1.004, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.008849557522124, |
|
"grad_norm": 55.82676315307617, |
|
"learning_rate": 3.78688524590164e-05, |
|
"loss": 1.3204, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.0265486725663717, |
|
"grad_norm": 56.670345306396484, |
|
"learning_rate": 3.754098360655738e-05, |
|
"loss": 1.0837, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.0442477876106195, |
|
"grad_norm": 166.82949829101562, |
|
"learning_rate": 3.721311475409836e-05, |
|
"loss": 1.1722, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.0619469026548674, |
|
"grad_norm": 83.52169036865234, |
|
"learning_rate": 3.6885245901639346e-05, |
|
"loss": 1.2194, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.079646017699115, |
|
"grad_norm": 56.17623519897461, |
|
"learning_rate": 3.655737704918033e-05, |
|
"loss": 1.1828, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0973451327433628, |
|
"grad_norm": 24.509546279907227, |
|
"learning_rate": 3.622950819672131e-05, |
|
"loss": 1.009, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.1150442477876106, |
|
"grad_norm": 80.25348663330078, |
|
"learning_rate": 3.590163934426229e-05, |
|
"loss": 1.1667, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.1327433628318584, |
|
"grad_norm": 98.71533966064453, |
|
"learning_rate": 3.557377049180328e-05, |
|
"loss": 1.1607, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.1504424778761062, |
|
"grad_norm": 42.726444244384766, |
|
"learning_rate": 3.524590163934427e-05, |
|
"loss": 1.0344, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.168141592920354, |
|
"grad_norm": 47.48421859741211, |
|
"learning_rate": 3.491803278688525e-05, |
|
"loss": 1.1513, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.1858407079646018, |
|
"grad_norm": 38.87162780761719, |
|
"learning_rate": 3.459016393442623e-05, |
|
"loss": 1.0672, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 1.2035398230088497, |
|
"grad_norm": 30.703020095825195, |
|
"learning_rate": 3.4262295081967214e-05, |
|
"loss": 1.2524, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.2212389380530975, |
|
"grad_norm": 62.74378967285156, |
|
"learning_rate": 3.39344262295082e-05, |
|
"loss": 1.0351, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.238938053097345, |
|
"grad_norm": 32.68797302246094, |
|
"learning_rate": 3.360655737704918e-05, |
|
"loss": 1.0135, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.2566371681415929, |
|
"grad_norm": 260.8392639160156, |
|
"learning_rate": 3.327868852459017e-05, |
|
"loss": 1.1592, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.2743362831858407, |
|
"grad_norm": 102.03837585449219, |
|
"learning_rate": 3.295081967213115e-05, |
|
"loss": 1.0648, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.2920353982300885, |
|
"grad_norm": 50.19871520996094, |
|
"learning_rate": 3.2622950819672136e-05, |
|
"loss": 1.0932, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.3097345132743363, |
|
"grad_norm": 108.73077392578125, |
|
"learning_rate": 3.2295081967213116e-05, |
|
"loss": 1.0388, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.3274336283185841, |
|
"grad_norm": 166.00540161132812, |
|
"learning_rate": 3.19672131147541e-05, |
|
"loss": 1.189, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.3451327433628317, |
|
"grad_norm": 29.591773986816406, |
|
"learning_rate": 3.163934426229508e-05, |
|
"loss": 0.9697, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.3628318584070795, |
|
"grad_norm": 96.86643981933594, |
|
"learning_rate": 3.131147540983606e-05, |
|
"loss": 1.1525, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.3805309734513274, |
|
"grad_norm": 52.641666412353516, |
|
"learning_rate": 3.098360655737705e-05, |
|
"loss": 1.1335, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.3982300884955752, |
|
"grad_norm": 34.866493225097656, |
|
"learning_rate": 3.065573770491804e-05, |
|
"loss": 1.285, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.415929203539823, |
|
"grad_norm": 23.98964500427246, |
|
"learning_rate": 3.0327868852459017e-05, |
|
"loss": 1.2042, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.4336283185840708, |
|
"grad_norm": 145.60592651367188, |
|
"learning_rate": 3e-05, |
|
"loss": 1.29, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.4513274336283186, |
|
"grad_norm": 53.379520416259766, |
|
"learning_rate": 2.967213114754098e-05, |
|
"loss": 1.1392, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.4690265486725664, |
|
"grad_norm": 60.14817428588867, |
|
"learning_rate": 2.934426229508197e-05, |
|
"loss": 1.2606, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.4867256637168142, |
|
"grad_norm": 31.738466262817383, |
|
"learning_rate": 2.901639344262295e-05, |
|
"loss": 1.1376, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.504424778761062, |
|
"grad_norm": 38.88958740234375, |
|
"learning_rate": 2.8688524590163935e-05, |
|
"loss": 1.1329, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.5221238938053099, |
|
"grad_norm": 61.10131072998047, |
|
"learning_rate": 2.8360655737704922e-05, |
|
"loss": 1.0537, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.5398230088495575, |
|
"grad_norm": 43.02183532714844, |
|
"learning_rate": 2.8032786885245906e-05, |
|
"loss": 1.1499, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.5575221238938053, |
|
"grad_norm": 37.601444244384766, |
|
"learning_rate": 2.7704918032786886e-05, |
|
"loss": 1.3708, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.575221238938053, |
|
"grad_norm": 29.7570743560791, |
|
"learning_rate": 2.737704918032787e-05, |
|
"loss": 1.138, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.592920353982301, |
|
"grad_norm": 94.0286865234375, |
|
"learning_rate": 2.7049180327868856e-05, |
|
"loss": 1.0933, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.6106194690265485, |
|
"grad_norm": 38.72492599487305, |
|
"learning_rate": 2.6721311475409837e-05, |
|
"loss": 0.9599, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.6283185840707963, |
|
"grad_norm": 30.70404624938965, |
|
"learning_rate": 2.639344262295082e-05, |
|
"loss": 1.0226, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.6460176991150441, |
|
"grad_norm": 57.246028900146484, |
|
"learning_rate": 2.6065573770491804e-05, |
|
"loss": 0.9137, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.663716814159292, |
|
"grad_norm": 30.864009857177734, |
|
"learning_rate": 2.573770491803279e-05, |
|
"loss": 1.0321, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.6814159292035398, |
|
"grad_norm": 21.78902816772461, |
|
"learning_rate": 2.540983606557377e-05, |
|
"loss": 1.1898, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.6991150442477876, |
|
"grad_norm": 50.869197845458984, |
|
"learning_rate": 2.5081967213114754e-05, |
|
"loss": 1.2291, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.7168141592920354, |
|
"grad_norm": 81.57111358642578, |
|
"learning_rate": 2.4754098360655738e-05, |
|
"loss": 1.1021, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.7345132743362832, |
|
"grad_norm": 28.92418098449707, |
|
"learning_rate": 2.442622950819672e-05, |
|
"loss": 0.9841, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.752212389380531, |
|
"grad_norm": 23.563941955566406, |
|
"learning_rate": 2.4098360655737705e-05, |
|
"loss": 1.4778, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.7699115044247788, |
|
"grad_norm": 39.69824981689453, |
|
"learning_rate": 2.377049180327869e-05, |
|
"loss": 1.0115, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.7876106194690267, |
|
"grad_norm": 503.5774841308594, |
|
"learning_rate": 2.3442622950819672e-05, |
|
"loss": 1.3526, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.8053097345132745, |
|
"grad_norm": 47.411170959472656, |
|
"learning_rate": 2.311475409836066e-05, |
|
"loss": 1.0035, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.823008849557522, |
|
"grad_norm": 60.6224479675293, |
|
"learning_rate": 2.278688524590164e-05, |
|
"loss": 1.2126, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.8407079646017699, |
|
"grad_norm": 51.3713264465332, |
|
"learning_rate": 2.2459016393442626e-05, |
|
"loss": 1.2112, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.8584070796460177, |
|
"grad_norm": 43.3449821472168, |
|
"learning_rate": 2.2131147540983607e-05, |
|
"loss": 1.1825, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.8761061946902655, |
|
"grad_norm": 51.22021484375, |
|
"learning_rate": 2.1803278688524594e-05, |
|
"loss": 1.0688, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.893805309734513, |
|
"grad_norm": 47.7821044921875, |
|
"learning_rate": 2.1475409836065574e-05, |
|
"loss": 1.1192, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.911504424778761, |
|
"grad_norm": 26.08221435546875, |
|
"learning_rate": 2.114754098360656e-05, |
|
"loss": 0.9688, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.9292035398230087, |
|
"grad_norm": 35.32424545288086, |
|
"learning_rate": 2.081967213114754e-05, |
|
"loss": 0.96, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.9469026548672566, |
|
"grad_norm": 33.47419357299805, |
|
"learning_rate": 2.0491803278688525e-05, |
|
"loss": 1.237, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.9646017699115044, |
|
"grad_norm": 54.51090621948242, |
|
"learning_rate": 2.0163934426229508e-05, |
|
"loss": 1.0218, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.9823008849557522, |
|
"grad_norm": 46.36600875854492, |
|
"learning_rate": 1.9836065573770492e-05, |
|
"loss": 1.0245, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 39.30439758300781, |
|
"learning_rate": 1.9508196721311475e-05, |
|
"loss": 1.0934, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.2027697563171387, |
|
"eval_map": 0.186, |
|
"eval_map_50": 0.3347, |
|
"eval_map_75": 0.2021, |
|
"eval_map_large": 0.2317, |
|
"eval_map_medium": 0.1023, |
|
"eval_map_per_class": 0.186, |
|
"eval_map_small": 0.0, |
|
"eval_mar_1": 0.2299, |
|
"eval_mar_10": 0.4979, |
|
"eval_mar_100": 0.5718, |
|
"eval_mar_100_per_class": 0.5718, |
|
"eval_mar_large": 0.6669, |
|
"eval_mar_medium": 0.3297, |
|
"eval_mar_small": 0.0, |
|
"eval_runtime": 6.8411, |
|
"eval_samples_per_second": 14.617, |
|
"eval_steps_per_second": 1.023, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 2.017699115044248, |
|
"grad_norm": 64.81815338134766, |
|
"learning_rate": 1.918032786885246e-05, |
|
"loss": 0.9753, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 2.0353982300884956, |
|
"grad_norm": 33.4212532043457, |
|
"learning_rate": 1.8852459016393442e-05, |
|
"loss": 1.0836, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.0530973451327434, |
|
"grad_norm": 43.58014678955078, |
|
"learning_rate": 1.8524590163934426e-05, |
|
"loss": 0.9376, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 2.0707964601769913, |
|
"grad_norm": 49.71245574951172, |
|
"learning_rate": 1.8196721311475413e-05, |
|
"loss": 1.088, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 2.088495575221239, |
|
"grad_norm": 37.22849655151367, |
|
"learning_rate": 1.7868852459016393e-05, |
|
"loss": 1.1392, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 2.106194690265487, |
|
"grad_norm": 37.5290412902832, |
|
"learning_rate": 1.754098360655738e-05, |
|
"loss": 1.0028, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 2.1238938053097347, |
|
"grad_norm": 39.413692474365234, |
|
"learning_rate": 1.721311475409836e-05, |
|
"loss": 1.0709, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.1415929203539825, |
|
"grad_norm": 58.448463439941406, |
|
"learning_rate": 1.6885245901639347e-05, |
|
"loss": 1.1094, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 2.15929203539823, |
|
"grad_norm": 49.975345611572266, |
|
"learning_rate": 1.6557377049180328e-05, |
|
"loss": 1.0425, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.1769911504424777, |
|
"grad_norm": 40.28168487548828, |
|
"learning_rate": 1.6229508196721314e-05, |
|
"loss": 1.1666, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 2.1946902654867255, |
|
"grad_norm": 123.18711853027344, |
|
"learning_rate": 1.5901639344262295e-05, |
|
"loss": 0.9255, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 2.2123893805309733, |
|
"grad_norm": 35.56529998779297, |
|
"learning_rate": 1.557377049180328e-05, |
|
"loss": 0.9732, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.230088495575221, |
|
"grad_norm": 48.11013412475586, |
|
"learning_rate": 1.5245901639344262e-05, |
|
"loss": 0.9164, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 2.247787610619469, |
|
"grad_norm": 33.767940521240234, |
|
"learning_rate": 1.4918032786885247e-05, |
|
"loss": 0.9673, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 2.265486725663717, |
|
"grad_norm": 105.49449157714844, |
|
"learning_rate": 1.4590163934426229e-05, |
|
"loss": 0.9585, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 2.2831858407079646, |
|
"grad_norm": 77.859619140625, |
|
"learning_rate": 1.4262295081967214e-05, |
|
"loss": 0.9652, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 2.3008849557522124, |
|
"grad_norm": 30.470129013061523, |
|
"learning_rate": 1.3934426229508196e-05, |
|
"loss": 0.9409, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.3185840707964602, |
|
"grad_norm": 25.803863525390625, |
|
"learning_rate": 1.3606557377049181e-05, |
|
"loss": 0.8861, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 2.336283185840708, |
|
"grad_norm": 19.344310760498047, |
|
"learning_rate": 1.3278688524590163e-05, |
|
"loss": 1.0303, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 2.353982300884956, |
|
"grad_norm": 30.006305694580078, |
|
"learning_rate": 1.2950819672131149e-05, |
|
"loss": 1.0346, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 2.3716814159292037, |
|
"grad_norm": 53.978092193603516, |
|
"learning_rate": 1.2622950819672132e-05, |
|
"loss": 0.9385, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 2.3893805309734515, |
|
"grad_norm": 40.302066802978516, |
|
"learning_rate": 1.2295081967213116e-05, |
|
"loss": 0.9354, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.4070796460176993, |
|
"grad_norm": 41.98043441772461, |
|
"learning_rate": 1.19672131147541e-05, |
|
"loss": 0.9648, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 2.4247787610619467, |
|
"grad_norm": 18.923696517944336, |
|
"learning_rate": 1.1639344262295083e-05, |
|
"loss": 0.8274, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 2.442477876106195, |
|
"grad_norm": 115.89277648925781, |
|
"learning_rate": 1.1311475409836065e-05, |
|
"loss": 0.905, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 2.4601769911504423, |
|
"grad_norm": 45.129112243652344, |
|
"learning_rate": 1.0983606557377048e-05, |
|
"loss": 1.116, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 2.47787610619469, |
|
"grad_norm": 40.201717376708984, |
|
"learning_rate": 1.0655737704918032e-05, |
|
"loss": 0.964, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.495575221238938, |
|
"grad_norm": 53.22237777709961, |
|
"learning_rate": 1.0327868852459017e-05, |
|
"loss": 0.9607, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 2.5132743362831858, |
|
"grad_norm": 49.16684341430664, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0343, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 2.5309734513274336, |
|
"grad_norm": 99.20750427246094, |
|
"learning_rate": 9.672131147540984e-06, |
|
"loss": 1.122, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 2.5486725663716814, |
|
"grad_norm": 24.961946487426758, |
|
"learning_rate": 9.344262295081968e-06, |
|
"loss": 0.9803, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 2.566371681415929, |
|
"grad_norm": 37.47533416748047, |
|
"learning_rate": 9.016393442622952e-06, |
|
"loss": 1.0125, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.584070796460177, |
|
"grad_norm": 25.763242721557617, |
|
"learning_rate": 8.688524590163935e-06, |
|
"loss": 0.9614, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 2.601769911504425, |
|
"grad_norm": 25.674652099609375, |
|
"learning_rate": 8.360655737704919e-06, |
|
"loss": 0.8985, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 2.6194690265486726, |
|
"grad_norm": 26.32975196838379, |
|
"learning_rate": 8.032786885245902e-06, |
|
"loss": 0.9328, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 2.6371681415929205, |
|
"grad_norm": 37.20893096923828, |
|
"learning_rate": 7.704918032786886e-06, |
|
"loss": 0.8681, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 2.6548672566371683, |
|
"grad_norm": 21.895004272460938, |
|
"learning_rate": 7.3770491803278695e-06, |
|
"loss": 1.0877, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.672566371681416, |
|
"grad_norm": 433.6007080078125, |
|
"learning_rate": 7.049180327868852e-06, |
|
"loss": 1.02, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 2.6902654867256635, |
|
"grad_norm": 51.63364028930664, |
|
"learning_rate": 6.721311475409836e-06, |
|
"loss": 0.9108, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 2.7079646017699117, |
|
"grad_norm": 31.832887649536133, |
|
"learning_rate": 6.393442622950819e-06, |
|
"loss": 1.2122, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 2.725663716814159, |
|
"grad_norm": 52.633541107177734, |
|
"learning_rate": 6.065573770491804e-06, |
|
"loss": 1.0646, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.7433628318584073, |
|
"grad_norm": 105.65243530273438, |
|
"learning_rate": 5.737704918032787e-06, |
|
"loss": 0.9322, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.7610619469026547, |
|
"grad_norm": 23.349111557006836, |
|
"learning_rate": 5.409836065573771e-06, |
|
"loss": 1.0782, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 2.7787610619469025, |
|
"grad_norm": 59.555641174316406, |
|
"learning_rate": 5.0819672131147545e-06, |
|
"loss": 0.8693, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 2.7964601769911503, |
|
"grad_norm": 36.903785705566406, |
|
"learning_rate": 4.754098360655738e-06, |
|
"loss": 0.8108, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 2.814159292035398, |
|
"grad_norm": 30.89893913269043, |
|
"learning_rate": 4.426229508196722e-06, |
|
"loss": 0.9355, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 2.831858407079646, |
|
"grad_norm": 109.97364807128906, |
|
"learning_rate": 4.098360655737704e-06, |
|
"loss": 0.9326, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.849557522123894, |
|
"grad_norm": 38.92977523803711, |
|
"learning_rate": 3.770491803278689e-06, |
|
"loss": 1.0016, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.8672566371681416, |
|
"grad_norm": 28.991559982299805, |
|
"learning_rate": 3.4426229508196724e-06, |
|
"loss": 0.9364, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 2.8849557522123894, |
|
"grad_norm": 49.69723892211914, |
|
"learning_rate": 3.114754098360656e-06, |
|
"loss": 1.056, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 2.9026548672566372, |
|
"grad_norm": 50.34890365600586, |
|
"learning_rate": 2.7868852459016396e-06, |
|
"loss": 0.8493, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 2.920353982300885, |
|
"grad_norm": 29.193132400512695, |
|
"learning_rate": 2.459016393442623e-06, |
|
"loss": 0.9151, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.938053097345133, |
|
"grad_norm": 120.2429428100586, |
|
"learning_rate": 2.1311475409836067e-06, |
|
"loss": 0.8943, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 2.9557522123893807, |
|
"grad_norm": 84.27303314208984, |
|
"learning_rate": 1.8032786885245903e-06, |
|
"loss": 0.9302, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 2.9734513274336285, |
|
"grad_norm": 19.526248931884766, |
|
"learning_rate": 1.4754098360655739e-06, |
|
"loss": 0.8956, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.991150442477876, |
|
"grad_norm": 51.6258430480957, |
|
"learning_rate": 1.1475409836065575e-06, |
|
"loss": 0.8992, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.8856489658355713, |
|
"eval_map": 0.1595, |
|
"eval_map_50": 0.2552, |
|
"eval_map_75": 0.1847, |
|
"eval_map_large": 0.1961, |
|
"eval_map_medium": 0.0755, |
|
"eval_map_per_class": 0.1595, |
|
"eval_map_small": 0.0, |
|
"eval_mar_1": 0.2338, |
|
"eval_mar_10": 0.5162, |
|
"eval_mar_100": 0.6897, |
|
"eval_mar_100_per_class": 0.6897, |
|
"eval_mar_large": 0.7544, |
|
"eval_mar_medium": 0.5297, |
|
"eval_mar_small": 0.0, |
|
"eval_runtime": 6.7061, |
|
"eval_samples_per_second": 14.912, |
|
"eval_steps_per_second": 1.044, |
|
"step": 339 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 339, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.05586283531952e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|