{ "best_metric": 0.881407804131599, "best_model_checkpoint": "distilbert-base-multilingual-cased-aoe-hyper/run-4/checkpoint-1308", "epoch": 2.0, "eval_steps": 500, "global_step": 1308, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01529051987767584, "grad_norm": 3.7934694290161133, "learning_rate": 6.268194604781005e-06, "loss": 0.6321, "step": 10 }, { "epoch": 0.03058103975535168, "grad_norm": 2.5848259925842285, "learning_rate": 6.244141670764194e-06, "loss": 0.6228, "step": 20 }, { "epoch": 0.045871559633027525, "grad_norm": 2.499722957611084, "learning_rate": 6.220088736747383e-06, "loss": 0.5166, "step": 30 }, { "epoch": 0.06116207951070336, "grad_norm": 3.433131456375122, "learning_rate": 6.1960358027305715e-06, "loss": 0.5895, "step": 40 }, { "epoch": 0.0764525993883792, "grad_norm": 2.0335094928741455, "learning_rate": 6.17198286871376e-06, "loss": 0.5482, "step": 50 }, { "epoch": 0.09174311926605505, "grad_norm": 2.239384174346924, "learning_rate": 6.1479299346969484e-06, "loss": 0.5125, "step": 60 }, { "epoch": 0.10703363914373089, "grad_norm": 2.9248292446136475, "learning_rate": 6.123877000680137e-06, "loss": 0.4949, "step": 70 }, { "epoch": 0.12232415902140673, "grad_norm": 2.9950616359710693, "learning_rate": 6.099824066663326e-06, "loss": 0.4823, "step": 80 }, { "epoch": 0.13761467889908258, "grad_norm": 3.120652914047241, "learning_rate": 6.075771132646515e-06, "loss": 0.3806, "step": 90 }, { "epoch": 0.1529051987767584, "grad_norm": 2.4965806007385254, "learning_rate": 6.051718198629704e-06, "loss": 0.4585, "step": 100 }, { "epoch": 0.16819571865443425, "grad_norm": 2.0298407077789307, "learning_rate": 6.027665264612893e-06, "loss": 0.288, "step": 110 }, { "epoch": 0.1834862385321101, "grad_norm": 2.880314350128174, "learning_rate": 6.003612330596082e-06, "loss": 0.4147, "step": 120 }, { "epoch": 0.19877675840978593, "grad_norm": 3.3571255207061768, "learning_rate": 5.97955939657927e-06, "loss": 0.4601, "step": 130 }, { "epoch": 0.21406727828746178, "grad_norm": 7.866518020629883, "learning_rate": 5.955506462562459e-06, "loss": 0.4556, "step": 140 }, { "epoch": 0.22935779816513763, "grad_norm": 5.3301472663879395, "learning_rate": 5.9314535285456475e-06, "loss": 0.473, "step": 150 }, { "epoch": 0.24464831804281345, "grad_norm": 3.0050952434539795, "learning_rate": 5.907400594528836e-06, "loss": 0.383, "step": 160 }, { "epoch": 0.2599388379204893, "grad_norm": 8.946796417236328, "learning_rate": 5.883347660512025e-06, "loss": 0.3306, "step": 170 }, { "epoch": 0.27522935779816515, "grad_norm": 12.456232070922852, "learning_rate": 5.859294726495214e-06, "loss": 0.3614, "step": 180 }, { "epoch": 0.290519877675841, "grad_norm": 20.59422492980957, "learning_rate": 5.835241792478403e-06, "loss": 0.4216, "step": 190 }, { "epoch": 0.3058103975535168, "grad_norm": 3.627934694290161, "learning_rate": 5.811188858461592e-06, "loss": 0.451, "step": 200 }, { "epoch": 0.3211009174311927, "grad_norm": 4.5968852043151855, "learning_rate": 5.78713592444478e-06, "loss": 0.3596, "step": 210 }, { "epoch": 0.3363914373088685, "grad_norm": 8.742961883544922, "learning_rate": 5.763082990427969e-06, "loss": 0.4125, "step": 220 }, { "epoch": 0.3516819571865443, "grad_norm": 4.3628644943237305, "learning_rate": 5.739030056411158e-06, "loss": 0.3956, "step": 230 }, { "epoch": 0.3669724770642202, "grad_norm": 6.9421210289001465, "learning_rate": 5.7149771223943465e-06, "loss": 0.4196, "step": 240 }, { "epoch": 0.382262996941896, "grad_norm": 5.0060906410217285, "learning_rate": 5.690924188377535e-06, "loss": 0.3415, "step": 250 }, { "epoch": 0.39755351681957185, "grad_norm": 3.756944417953491, "learning_rate": 5.666871254360724e-06, "loss": 0.3433, "step": 260 }, { "epoch": 0.41284403669724773, "grad_norm": 8.966843605041504, "learning_rate": 5.642818320343913e-06, "loss": 0.326, "step": 270 }, { "epoch": 0.42813455657492355, "grad_norm": 13.75231647491455, "learning_rate": 5.618765386327102e-06, "loss": 0.2412, "step": 280 }, { "epoch": 0.4434250764525994, "grad_norm": 9.244980812072754, "learning_rate": 5.594712452310291e-06, "loss": 0.3949, "step": 290 }, { "epoch": 0.45871559633027525, "grad_norm": 7.885373115539551, "learning_rate": 5.57065951829348e-06, "loss": 0.2693, "step": 300 }, { "epoch": 0.4740061162079511, "grad_norm": 14.435099601745605, "learning_rate": 5.546606584276669e-06, "loss": 0.4065, "step": 310 }, { "epoch": 0.4892966360856269, "grad_norm": 27.61042022705078, "learning_rate": 5.5225536502598575e-06, "loss": 0.3138, "step": 320 }, { "epoch": 0.5045871559633027, "grad_norm": 6.691371440887451, "learning_rate": 5.4985007162430455e-06, "loss": 0.2268, "step": 330 }, { "epoch": 0.5198776758409785, "grad_norm": 6.305394172668457, "learning_rate": 5.474447782226234e-06, "loss": 0.2565, "step": 340 }, { "epoch": 0.5351681957186545, "grad_norm": 5.162963390350342, "learning_rate": 5.450394848209423e-06, "loss": 0.3489, "step": 350 }, { "epoch": 0.5504587155963303, "grad_norm": 12.863113403320312, "learning_rate": 5.426341914192612e-06, "loss": 0.4205, "step": 360 }, { "epoch": 0.5657492354740061, "grad_norm": 8.720885276794434, "learning_rate": 5.402288980175801e-06, "loss": 0.2633, "step": 370 }, { "epoch": 0.581039755351682, "grad_norm": 1.3078076839447021, "learning_rate": 5.378236046158989e-06, "loss": 0.2705, "step": 380 }, { "epoch": 0.5963302752293578, "grad_norm": 19.819198608398438, "learning_rate": 5.354183112142178e-06, "loss": 0.3007, "step": 390 }, { "epoch": 0.6116207951070336, "grad_norm": 9.876558303833008, "learning_rate": 5.330130178125367e-06, "loss": 0.2945, "step": 400 }, { "epoch": 0.6269113149847095, "grad_norm": 4.9784770011901855, "learning_rate": 5.306077244108556e-06, "loss": 0.3746, "step": 410 }, { "epoch": 0.6422018348623854, "grad_norm": 21.62326431274414, "learning_rate": 5.2820243100917446e-06, "loss": 0.291, "step": 420 }, { "epoch": 0.6574923547400612, "grad_norm": 10.322495460510254, "learning_rate": 5.2579713760749334e-06, "loss": 0.2793, "step": 430 }, { "epoch": 0.672782874617737, "grad_norm": 27.78510856628418, "learning_rate": 5.233918442058122e-06, "loss": 0.4447, "step": 440 }, { "epoch": 0.6880733944954128, "grad_norm": 8.71343994140625, "learning_rate": 5.209865508041311e-06, "loss": 0.169, "step": 450 }, { "epoch": 0.7033639143730887, "grad_norm": 7.693337917327881, "learning_rate": 5.1858125740245e-06, "loss": 0.394, "step": 460 }, { "epoch": 0.7186544342507645, "grad_norm": 8.130308151245117, "learning_rate": 5.161759640007688e-06, "loss": 0.1776, "step": 470 }, { "epoch": 0.7339449541284404, "grad_norm": 2.7745859622955322, "learning_rate": 5.137706705990877e-06, "loss": 0.216, "step": 480 }, { "epoch": 0.7492354740061162, "grad_norm": 8.477354049682617, "learning_rate": 5.113653771974066e-06, "loss": 0.3386, "step": 490 }, { "epoch": 0.764525993883792, "grad_norm": 8.951611518859863, "learning_rate": 5.089600837957255e-06, "loss": 0.2797, "step": 500 }, { "epoch": 0.7798165137614679, "grad_norm": 11.43747615814209, "learning_rate": 5.065547903940444e-06, "loss": 0.4733, "step": 510 }, { "epoch": 0.7951070336391437, "grad_norm": 19.584590911865234, "learning_rate": 5.0414949699236325e-06, "loss": 0.4629, "step": 520 }, { "epoch": 0.8103975535168195, "grad_norm": 11.580733299255371, "learning_rate": 5.017442035906821e-06, "loss": 0.1946, "step": 530 }, { "epoch": 0.8256880733944955, "grad_norm": 3.2449963092803955, "learning_rate": 4.99338910189001e-06, "loss": 0.282, "step": 540 }, { "epoch": 0.8409785932721713, "grad_norm": 20.366369247436523, "learning_rate": 4.969336167873199e-06, "loss": 0.3433, "step": 550 }, { "epoch": 0.8562691131498471, "grad_norm": 9.700865745544434, "learning_rate": 4.945283233856388e-06, "loss": 0.3337, "step": 560 }, { "epoch": 0.8715596330275229, "grad_norm": 8.121973037719727, "learning_rate": 4.921230299839577e-06, "loss": 0.3658, "step": 570 }, { "epoch": 0.8868501529051988, "grad_norm": 8.352603912353516, "learning_rate": 4.897177365822766e-06, "loss": 0.4038, "step": 580 }, { "epoch": 0.9021406727828746, "grad_norm": 6.9203290939331055, "learning_rate": 4.873124431805954e-06, "loss": 0.3139, "step": 590 }, { "epoch": 0.9174311926605505, "grad_norm": 9.17638111114502, "learning_rate": 4.849071497789143e-06, "loss": 0.37, "step": 600 }, { "epoch": 0.9327217125382263, "grad_norm": 17.2983341217041, "learning_rate": 4.8250185637723315e-06, "loss": 0.3072, "step": 610 }, { "epoch": 0.9480122324159022, "grad_norm": 5.322959899902344, "learning_rate": 4.80096562975552e-06, "loss": 0.323, "step": 620 }, { "epoch": 0.963302752293578, "grad_norm": 19.547361373901367, "learning_rate": 4.776912695738708e-06, "loss": 0.4587, "step": 630 }, { "epoch": 0.9785932721712538, "grad_norm": 8.637187004089355, "learning_rate": 4.752859761721897e-06, "loss": 0.2532, "step": 640 }, { "epoch": 0.9938837920489296, "grad_norm": 11.7959566116333, "learning_rate": 4.728806827705086e-06, "loss": 0.2689, "step": 650 }, { "epoch": 1.0, "eval_accuracy": 0.8706962509563887, "eval_f1": 0.6932849364791288, "eval_loss": 0.3414114713668823, "eval_precision": 0.799163179916318, "eval_recall": 0.6121794871794872, "eval_runtime": 4.8917, "eval_samples_per_second": 267.189, "eval_steps_per_second": 16.763, "step": 654 }, { "epoch": 1.0091743119266054, "grad_norm": 2.8421857357025146, "learning_rate": 4.704753893688275e-06, "loss": 0.3789, "step": 660 }, { "epoch": 1.0244648318042813, "grad_norm": 8.888960838317871, "learning_rate": 4.680700959671464e-06, "loss": 0.2548, "step": 670 }, { "epoch": 1.039755351681957, "grad_norm": 6.103824615478516, "learning_rate": 4.656648025654653e-06, "loss": 0.3478, "step": 680 }, { "epoch": 1.0550458715596331, "grad_norm": 22.265275955200195, "learning_rate": 4.632595091637842e-06, "loss": 0.3016, "step": 690 }, { "epoch": 1.070336391437309, "grad_norm": 9.670002937316895, "learning_rate": 4.6085421576210305e-06, "loss": 0.2558, "step": 700 }, { "epoch": 1.0856269113149848, "grad_norm": 1.852484941482544, "learning_rate": 4.584489223604219e-06, "loss": 0.2439, "step": 710 }, { "epoch": 1.1009174311926606, "grad_norm": 2.102276563644409, "learning_rate": 4.560436289587408e-06, "loss": 0.2946, "step": 720 }, { "epoch": 1.1162079510703364, "grad_norm": 5.1781439781188965, "learning_rate": 4.536383355570596e-06, "loss": 0.2724, "step": 730 }, { "epoch": 1.1314984709480123, "grad_norm": 5.4545722007751465, "learning_rate": 4.512330421553785e-06, "loss": 0.2599, "step": 740 }, { "epoch": 1.146788990825688, "grad_norm": 10.079143524169922, "learning_rate": 4.488277487536974e-06, "loss": 0.2584, "step": 750 }, { "epoch": 1.162079510703364, "grad_norm": 4.362283229827881, "learning_rate": 4.464224553520163e-06, "loss": 0.2489, "step": 760 }, { "epoch": 1.1773700305810397, "grad_norm": 7.355106353759766, "learning_rate": 4.440171619503352e-06, "loss": 0.3536, "step": 770 }, { "epoch": 1.1926605504587156, "grad_norm": 2.0413646697998047, "learning_rate": 4.416118685486541e-06, "loss": 0.1874, "step": 780 }, { "epoch": 1.2079510703363914, "grad_norm": 2.6824581623077393, "learning_rate": 4.3920657514697295e-06, "loss": 0.2813, "step": 790 }, { "epoch": 1.2232415902140672, "grad_norm": 1.147255539894104, "learning_rate": 4.368012817452918e-06, "loss": 0.2321, "step": 800 }, { "epoch": 1.238532110091743, "grad_norm": 8.774001121520996, "learning_rate": 4.343959883436107e-06, "loss": 0.2953, "step": 810 }, { "epoch": 1.2538226299694188, "grad_norm": 25.203166961669922, "learning_rate": 4.319906949419296e-06, "loss": 0.266, "step": 820 }, { "epoch": 1.2691131498470947, "grad_norm": 1.5749539136886597, "learning_rate": 4.295854015402485e-06, "loss": 0.2924, "step": 830 }, { "epoch": 1.2844036697247707, "grad_norm": 15.727303504943848, "learning_rate": 4.271801081385674e-06, "loss": 0.2302, "step": 840 }, { "epoch": 1.2996941896024465, "grad_norm": 35.48811721801758, "learning_rate": 4.247748147368862e-06, "loss": 0.2971, "step": 850 }, { "epoch": 1.3149847094801224, "grad_norm": 16.261396408081055, "learning_rate": 4.223695213352051e-06, "loss": 0.3347, "step": 860 }, { "epoch": 1.3302752293577982, "grad_norm": 1.1547783613204956, "learning_rate": 4.19964227933524e-06, "loss": 0.1146, "step": 870 }, { "epoch": 1.345565749235474, "grad_norm": 23.506380081176758, "learning_rate": 4.1755893453184286e-06, "loss": 0.2384, "step": 880 }, { "epoch": 1.3608562691131498, "grad_norm": 9.628520011901855, "learning_rate": 4.151536411301617e-06, "loss": 0.3036, "step": 890 }, { "epoch": 1.3761467889908257, "grad_norm": 36.97438049316406, "learning_rate": 4.1274834772848055e-06, "loss": 0.2811, "step": 900 }, { "epoch": 1.3914373088685015, "grad_norm": 1.8695220947265625, "learning_rate": 4.103430543267994e-06, "loss": 0.2825, "step": 910 }, { "epoch": 1.4067278287461773, "grad_norm": 23.051288604736328, "learning_rate": 4.079377609251183e-06, "loss": 0.194, "step": 920 }, { "epoch": 1.4220183486238533, "grad_norm": 10.35480785369873, "learning_rate": 4.055324675234372e-06, "loss": 0.2528, "step": 930 }, { "epoch": 1.4373088685015292, "grad_norm": 4.950748443603516, "learning_rate": 4.031271741217561e-06, "loss": 0.3389, "step": 940 }, { "epoch": 1.452599388379205, "grad_norm": 0.8531694412231445, "learning_rate": 4.00721880720075e-06, "loss": 0.2006, "step": 950 }, { "epoch": 1.4678899082568808, "grad_norm": 5.678575038909912, "learning_rate": 3.983165873183939e-06, "loss": 0.1901, "step": 960 }, { "epoch": 1.4831804281345566, "grad_norm": 15.318537712097168, "learning_rate": 3.959112939167128e-06, "loss": 0.6674, "step": 970 }, { "epoch": 1.4984709480122325, "grad_norm": 17.652233123779297, "learning_rate": 3.9350600051503165e-06, "loss": 0.1986, "step": 980 }, { "epoch": 1.5137614678899083, "grad_norm": 0.26174286007881165, "learning_rate": 3.9110070711335045e-06, "loss": 0.267, "step": 990 }, { "epoch": 1.529051987767584, "grad_norm": 9.704527854919434, "learning_rate": 3.886954137116693e-06, "loss": 0.3589, "step": 1000 }, { "epoch": 1.54434250764526, "grad_norm": 22.791322708129883, "learning_rate": 3.862901203099882e-06, "loss": 0.3213, "step": 1010 }, { "epoch": 1.5596330275229358, "grad_norm": 7.6159281730651855, "learning_rate": 3.838848269083071e-06, "loss": 0.3337, "step": 1020 }, { "epoch": 1.5749235474006116, "grad_norm": 28.031082153320312, "learning_rate": 3.81479533506626e-06, "loss": 0.3376, "step": 1030 }, { "epoch": 1.5902140672782874, "grad_norm": 20.402673721313477, "learning_rate": 3.790742401049449e-06, "loss": 0.3356, "step": 1040 }, { "epoch": 1.6055045871559632, "grad_norm": 2.4617958068847656, "learning_rate": 3.7666894670326378e-06, "loss": 0.2407, "step": 1050 }, { "epoch": 1.620795107033639, "grad_norm": 25.68115234375, "learning_rate": 3.7426365330158266e-06, "loss": 0.2576, "step": 1060 }, { "epoch": 1.6360856269113149, "grad_norm": 0.6402398347854614, "learning_rate": 3.7185835989990155e-06, "loss": 0.2803, "step": 1070 }, { "epoch": 1.6513761467889907, "grad_norm": 2.904221296310425, "learning_rate": 3.694530664982204e-06, "loss": 0.2053, "step": 1080 }, { "epoch": 1.6666666666666665, "grad_norm": 29.700824737548828, "learning_rate": 3.670477730965393e-06, "loss": 0.463, "step": 1090 }, { "epoch": 1.6819571865443423, "grad_norm": 5.498286724090576, "learning_rate": 3.6464247969485817e-06, "loss": 0.2744, "step": 1100 }, { "epoch": 1.6972477064220184, "grad_norm": 15.300117492675781, "learning_rate": 3.6223718629317706e-06, "loss": 0.2365, "step": 1110 }, { "epoch": 1.7125382262996942, "grad_norm": 17.502981185913086, "learning_rate": 3.5983189289149595e-06, "loss": 0.5628, "step": 1120 }, { "epoch": 1.72782874617737, "grad_norm": 9.557957649230957, "learning_rate": 3.5742659948981483e-06, "loss": 0.2042, "step": 1130 }, { "epoch": 1.7431192660550459, "grad_norm": 13.202752113342285, "learning_rate": 3.5502130608813364e-06, "loss": 0.2895, "step": 1140 }, { "epoch": 1.7584097859327217, "grad_norm": 10.660998344421387, "learning_rate": 3.5261601268645252e-06, "loss": 0.2851, "step": 1150 }, { "epoch": 1.7737003058103975, "grad_norm": 38.87590408325195, "learning_rate": 3.5021071928477137e-06, "loss": 0.2375, "step": 1160 }, { "epoch": 1.7889908256880735, "grad_norm": 13.82657241821289, "learning_rate": 3.4780542588309026e-06, "loss": 0.3832, "step": 1170 }, { "epoch": 1.8042813455657494, "grad_norm": 28.141618728637695, "learning_rate": 3.4540013248140914e-06, "loss": 0.2534, "step": 1180 }, { "epoch": 1.8195718654434252, "grad_norm": 2.6695139408111572, "learning_rate": 3.4299483907972803e-06, "loss": 0.2649, "step": 1190 }, { "epoch": 1.834862385321101, "grad_norm": 1.363698959350586, "learning_rate": 3.405895456780469e-06, "loss": 0.2242, "step": 1200 }, { "epoch": 1.8501529051987768, "grad_norm": 28.01278305053711, "learning_rate": 3.381842522763658e-06, "loss": 0.3021, "step": 1210 }, { "epoch": 1.8654434250764527, "grad_norm": 31.1824951171875, "learning_rate": 3.3577895887468465e-06, "loss": 0.2872, "step": 1220 }, { "epoch": 1.8807339449541285, "grad_norm": 24.482133865356445, "learning_rate": 3.3337366547300354e-06, "loss": 0.3046, "step": 1230 }, { "epoch": 1.8960244648318043, "grad_norm": 17.4123592376709, "learning_rate": 3.3096837207132243e-06, "loss": 0.2265, "step": 1240 }, { "epoch": 1.9113149847094801, "grad_norm": 5.79738187789917, "learning_rate": 3.285630786696413e-06, "loss": 0.2958, "step": 1250 }, { "epoch": 1.926605504587156, "grad_norm": 12.928561210632324, "learning_rate": 3.261577852679602e-06, "loss": 0.3954, "step": 1260 }, { "epoch": 1.9418960244648318, "grad_norm": 0.2753223180770874, "learning_rate": 3.237524918662791e-06, "loss": 0.1626, "step": 1270 }, { "epoch": 1.9571865443425076, "grad_norm": 1.013130784034729, "learning_rate": 3.2134719846459793e-06, "loss": 0.3307, "step": 1280 }, { "epoch": 1.9724770642201834, "grad_norm": 8.603392601013184, "learning_rate": 3.1894190506291682e-06, "loss": 0.1127, "step": 1290 }, { "epoch": 1.9877675840978593, "grad_norm": 17.115859985351562, "learning_rate": 3.165366116612357e-06, "loss": 0.4232, "step": 1300 }, { "epoch": 2.0, "eval_accuracy": 0.881407804131599, "eval_f1": 0.7543581616481775, "eval_loss": 0.35450226068496704, "eval_precision": 0.7460815047021944, "eval_recall": 0.7628205128205128, "eval_runtime": 4.8961, "eval_samples_per_second": 266.945, "eval_steps_per_second": 16.748, "step": 1308 } ], "logging_steps": 10, "max_steps": 2616, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1377131076427776.0, "train_batch_size": 8, "trial_name": null, "trial_params": { "learning_rate": 6.292247538797816e-06, "num_train_epochs": 4, "per_device_train_batch_size": 8, "seed": 16 } }