{ "best_metric": 0.22676756978034973, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.29027576197387517, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001451378809869376, "grad_norm": 0.11311399191617966, "learning_rate": 1.013e-05, "loss": 0.2249, "step": 1 }, { "epoch": 0.001451378809869376, "eval_loss": 0.27678582072257996, "eval_runtime": 142.3159, "eval_samples_per_second": 2.045, "eval_steps_per_second": 0.513, "step": 1 }, { "epoch": 0.002902757619738752, "grad_norm": 0.11226661503314972, "learning_rate": 2.026e-05, "loss": 0.4195, "step": 2 }, { "epoch": 0.0043541364296081275, "grad_norm": 0.1760484129190445, "learning_rate": 3.039e-05, "loss": 0.3752, "step": 3 }, { "epoch": 0.005805515239477504, "grad_norm": 0.13543817400932312, "learning_rate": 4.052e-05, "loss": 0.3455, "step": 4 }, { "epoch": 0.00725689404934688, "grad_norm": 0.11544519662857056, "learning_rate": 5.065e-05, "loss": 0.1971, "step": 5 }, { "epoch": 0.008708272859216255, "grad_norm": 0.13015075027942657, "learning_rate": 6.078e-05, "loss": 0.2833, "step": 6 }, { "epoch": 0.010159651669085631, "grad_norm": 0.14773070812225342, "learning_rate": 7.091e-05, "loss": 0.4397, "step": 7 }, { "epoch": 0.011611030478955007, "grad_norm": 0.15062645077705383, "learning_rate": 8.104e-05, "loss": 0.4152, "step": 8 }, { "epoch": 0.013062409288824383, "grad_norm": 0.12542325258255005, "learning_rate": 9.117e-05, "loss": 0.3613, "step": 9 }, { "epoch": 0.01451378809869376, "grad_norm": 0.13777385652065277, "learning_rate": 0.0001013, "loss": 0.7267, "step": 10 }, { "epoch": 0.015965166908563134, "grad_norm": 0.16426308453083038, "learning_rate": 0.00010076684210526316, "loss": 0.5661, "step": 11 }, { "epoch": 0.01741654571843251, "grad_norm": 0.12953603267669678, "learning_rate": 0.0001002336842105263, "loss": 0.3289, "step": 12 }, { "epoch": 0.018867924528301886, "grad_norm": 0.1950962245464325, "learning_rate": 9.970052631578946e-05, "loss": 0.6131, "step": 13 }, { "epoch": 0.020319303338171262, "grad_norm": 0.17205172777175903, "learning_rate": 9.916736842105263e-05, "loss": 0.4953, "step": 14 }, { "epoch": 0.02177068214804064, "grad_norm": 0.13275088369846344, "learning_rate": 9.863421052631579e-05, "loss": 0.2192, "step": 15 }, { "epoch": 0.023222060957910014, "grad_norm": 0.13935284316539764, "learning_rate": 9.810105263157895e-05, "loss": 0.2338, "step": 16 }, { "epoch": 0.02467343976777939, "grad_norm": 0.1427544802427292, "learning_rate": 9.756789473684211e-05, "loss": 0.3142, "step": 17 }, { "epoch": 0.026124818577648767, "grad_norm": 0.13596214354038239, "learning_rate": 9.703473684210525e-05, "loss": 0.255, "step": 18 }, { "epoch": 0.027576197387518143, "grad_norm": 0.16519062221050262, "learning_rate": 9.650157894736842e-05, "loss": 0.5583, "step": 19 }, { "epoch": 0.02902757619738752, "grad_norm": 0.11557265371084213, "learning_rate": 9.596842105263158e-05, "loss": 0.1951, "step": 20 }, { "epoch": 0.030478955007256895, "grad_norm": 0.09901455044746399, "learning_rate": 9.543526315789474e-05, "loss": 0.1046, "step": 21 }, { "epoch": 0.03193033381712627, "grad_norm": 0.15276901423931122, "learning_rate": 9.49021052631579e-05, "loss": 0.445, "step": 22 }, { "epoch": 0.033381712626995644, "grad_norm": 0.15448720753192902, "learning_rate": 9.436894736842105e-05, "loss": 0.4333, "step": 23 }, { "epoch": 0.03483309143686502, "grad_norm": 0.18187403678894043, "learning_rate": 9.38357894736842e-05, "loss": 0.3444, "step": 24 }, { "epoch": 0.036284470246734396, "grad_norm": 0.13476215302944183, "learning_rate": 9.330263157894737e-05, "loss": 0.3449, "step": 25 }, { "epoch": 0.03773584905660377, "grad_norm": 0.15487666428089142, "learning_rate": 9.276947368421051e-05, "loss": 0.4944, "step": 26 }, { "epoch": 0.03918722786647315, "grad_norm": 0.17659035325050354, "learning_rate": 9.223631578947369e-05, "loss": 0.5834, "step": 27 }, { "epoch": 0.040638606676342524, "grad_norm": 0.1136946901679039, "learning_rate": 9.170315789473684e-05, "loss": 0.2565, "step": 28 }, { "epoch": 0.0420899854862119, "grad_norm": 0.17775152623653412, "learning_rate": 9.117e-05, "loss": 0.4925, "step": 29 }, { "epoch": 0.04354136429608128, "grad_norm": 0.2578783333301544, "learning_rate": 9.063684210526316e-05, "loss": 0.3918, "step": 30 }, { "epoch": 0.04499274310595065, "grad_norm": 0.1302911639213562, "learning_rate": 9.010368421052632e-05, "loss": 0.3904, "step": 31 }, { "epoch": 0.04644412191582003, "grad_norm": 0.12905248999595642, "learning_rate": 8.957052631578946e-05, "loss": 0.28, "step": 32 }, { "epoch": 0.047895500725689405, "grad_norm": 0.16814574599266052, "learning_rate": 8.903736842105263e-05, "loss": 0.4776, "step": 33 }, { "epoch": 0.04934687953555878, "grad_norm": 0.1260037124156952, "learning_rate": 8.850421052631579e-05, "loss": 0.2609, "step": 34 }, { "epoch": 0.05079825834542816, "grad_norm": 0.10647979378700256, "learning_rate": 8.797105263157895e-05, "loss": 0.2253, "step": 35 }, { "epoch": 0.05224963715529753, "grad_norm": 0.10167553275823593, "learning_rate": 8.743789473684211e-05, "loss": 0.2064, "step": 36 }, { "epoch": 0.05370101596516691, "grad_norm": 0.13301363587379456, "learning_rate": 8.690473684210526e-05, "loss": 0.2473, "step": 37 }, { "epoch": 0.055152394775036286, "grad_norm": 0.11595452576875687, "learning_rate": 8.637157894736842e-05, "loss": 0.2187, "step": 38 }, { "epoch": 0.05660377358490566, "grad_norm": 0.15230633318424225, "learning_rate": 8.583842105263158e-05, "loss": 0.3451, "step": 39 }, { "epoch": 0.05805515239477504, "grad_norm": 0.15629370510578156, "learning_rate": 8.530526315789472e-05, "loss": 0.6737, "step": 40 }, { "epoch": 0.059506531204644414, "grad_norm": 0.14271894097328186, "learning_rate": 8.47721052631579e-05, "loss": 0.216, "step": 41 }, { "epoch": 0.06095791001451379, "grad_norm": 0.23410384356975555, "learning_rate": 8.423894736842105e-05, "loss": 0.3089, "step": 42 }, { "epoch": 0.062409288824383166, "grad_norm": 0.13687372207641602, "learning_rate": 8.37057894736842e-05, "loss": 0.212, "step": 43 }, { "epoch": 0.06386066763425254, "grad_norm": 0.1781831830739975, "learning_rate": 8.317263157894737e-05, "loss": 0.6556, "step": 44 }, { "epoch": 0.06531204644412192, "grad_norm": 0.25682398676872253, "learning_rate": 8.263947368421053e-05, "loss": 0.5198, "step": 45 }, { "epoch": 0.06676342525399129, "grad_norm": 0.2759973406791687, "learning_rate": 8.210631578947368e-05, "loss": 0.9177, "step": 46 }, { "epoch": 0.06821480406386067, "grad_norm": 0.2668778598308563, "learning_rate": 8.157315789473684e-05, "loss": 0.8107, "step": 47 }, { "epoch": 0.06966618287373004, "grad_norm": 0.2767494022846222, "learning_rate": 8.104e-05, "loss": 0.606, "step": 48 }, { "epoch": 0.07111756168359942, "grad_norm": 0.5263920426368713, "learning_rate": 8.050684210526316e-05, "loss": 0.9517, "step": 49 }, { "epoch": 0.07256894049346879, "grad_norm": 0.4693450927734375, "learning_rate": 7.997368421052632e-05, "loss": 1.1133, "step": 50 }, { "epoch": 0.07256894049346879, "eval_loss": 0.2412186712026596, "eval_runtime": 145.5936, "eval_samples_per_second": 1.999, "eval_steps_per_second": 0.501, "step": 50 }, { "epoch": 0.07402031930333818, "grad_norm": 0.1490558385848999, "learning_rate": 7.944052631578947e-05, "loss": 0.5739, "step": 51 }, { "epoch": 0.07547169811320754, "grad_norm": 0.19806860387325287, "learning_rate": 7.890736842105263e-05, "loss": 0.3819, "step": 52 }, { "epoch": 0.07692307692307693, "grad_norm": 0.12554095685482025, "learning_rate": 7.837421052631579e-05, "loss": 0.3486, "step": 53 }, { "epoch": 0.0783744557329463, "grad_norm": 0.17904381453990936, "learning_rate": 7.784105263157893e-05, "loss": 0.6337, "step": 54 }, { "epoch": 0.07982583454281568, "grad_norm": 0.16063204407691956, "learning_rate": 7.730789473684211e-05, "loss": 0.4335, "step": 55 }, { "epoch": 0.08127721335268505, "grad_norm": 0.17032606899738312, "learning_rate": 7.677473684210526e-05, "loss": 0.5176, "step": 56 }, { "epoch": 0.08272859216255443, "grad_norm": 0.36131739616394043, "learning_rate": 7.624157894736842e-05, "loss": 0.3518, "step": 57 }, { "epoch": 0.0841799709724238, "grad_norm": 0.19793149828910828, "learning_rate": 7.570842105263158e-05, "loss": 0.8414, "step": 58 }, { "epoch": 0.08563134978229318, "grad_norm": 0.13127806782722473, "learning_rate": 7.517526315789474e-05, "loss": 0.3519, "step": 59 }, { "epoch": 0.08708272859216255, "grad_norm": 0.1507265418767929, "learning_rate": 7.464210526315789e-05, "loss": 0.8202, "step": 60 }, { "epoch": 0.08853410740203194, "grad_norm": 0.1529066562652588, "learning_rate": 7.410894736842106e-05, "loss": 0.5119, "step": 61 }, { "epoch": 0.0899854862119013, "grad_norm": 0.12405847012996674, "learning_rate": 7.35757894736842e-05, "loss": 0.5436, "step": 62 }, { "epoch": 0.09143686502177069, "grad_norm": 0.16444605588912964, "learning_rate": 7.304263157894737e-05, "loss": 0.4945, "step": 63 }, { "epoch": 0.09288824383164006, "grad_norm": 0.10274633765220642, "learning_rate": 7.250947368421053e-05, "loss": 0.2285, "step": 64 }, { "epoch": 0.09433962264150944, "grad_norm": 0.13879486918449402, "learning_rate": 7.197631578947368e-05, "loss": 0.3028, "step": 65 }, { "epoch": 0.09579100145137881, "grad_norm": 0.1328522115945816, "learning_rate": 7.144315789473684e-05, "loss": 0.4795, "step": 66 }, { "epoch": 0.09724238026124818, "grad_norm": 0.15546078979969025, "learning_rate": 7.091e-05, "loss": 0.5576, "step": 67 }, { "epoch": 0.09869375907111756, "grad_norm": 0.16497625410556793, "learning_rate": 7.037684210526316e-05, "loss": 0.271, "step": 68 }, { "epoch": 0.10014513788098693, "grad_norm": 0.1409892439842224, "learning_rate": 6.984368421052632e-05, "loss": 0.4059, "step": 69 }, { "epoch": 0.10159651669085631, "grad_norm": 0.173180490732193, "learning_rate": 6.931052631578947e-05, "loss": 0.5719, "step": 70 }, { "epoch": 0.10304789550072568, "grad_norm": 0.1634824275970459, "learning_rate": 6.877736842105263e-05, "loss": 0.3316, "step": 71 }, { "epoch": 0.10449927431059507, "grad_norm": 0.1520518958568573, "learning_rate": 6.824421052631579e-05, "loss": 0.749, "step": 72 }, { "epoch": 0.10595065312046444, "grad_norm": 0.11503203213214874, "learning_rate": 6.771105263157895e-05, "loss": 0.3243, "step": 73 }, { "epoch": 0.10740203193033382, "grad_norm": 0.09616807848215103, "learning_rate": 6.71778947368421e-05, "loss": 0.2127, "step": 74 }, { "epoch": 0.10885341074020319, "grad_norm": 0.12218757718801498, "learning_rate": 6.664473684210527e-05, "loss": 0.3151, "step": 75 }, { "epoch": 0.11030478955007257, "grad_norm": 0.11861466616392136, "learning_rate": 6.611157894736842e-05, "loss": 0.2481, "step": 76 }, { "epoch": 0.11175616835994194, "grad_norm": 0.08120733499526978, "learning_rate": 6.557842105263158e-05, "loss": 0.1281, "step": 77 }, { "epoch": 0.11320754716981132, "grad_norm": 0.11339303106069565, "learning_rate": 6.504526315789474e-05, "loss": 0.3048, "step": 78 }, { "epoch": 0.11465892597968069, "grad_norm": 0.12353406846523285, "learning_rate": 6.451210526315789e-05, "loss": 0.522, "step": 79 }, { "epoch": 0.11611030478955008, "grad_norm": 0.18549102544784546, "learning_rate": 6.397894736842105e-05, "loss": 1.1374, "step": 80 }, { "epoch": 0.11756168359941944, "grad_norm": 0.10565419495105743, "learning_rate": 6.344578947368421e-05, "loss": 0.2235, "step": 81 }, { "epoch": 0.11901306240928883, "grad_norm": 0.13751162588596344, "learning_rate": 6.291263157894737e-05, "loss": 0.4815, "step": 82 }, { "epoch": 0.1204644412191582, "grad_norm": 0.12583044171333313, "learning_rate": 6.237947368421053e-05, "loss": 0.4072, "step": 83 }, { "epoch": 0.12191582002902758, "grad_norm": 0.11933013796806335, "learning_rate": 6.184631578947368e-05, "loss": 0.4836, "step": 84 }, { "epoch": 0.12336719883889695, "grad_norm": 0.17227371037006378, "learning_rate": 6.131315789473684e-05, "loss": 0.6044, "step": 85 }, { "epoch": 0.12481857764876633, "grad_norm": 0.1414662003517151, "learning_rate": 6.078e-05, "loss": 0.5979, "step": 86 }, { "epoch": 0.1262699564586357, "grad_norm": 0.08421526849269867, "learning_rate": 6.024684210526315e-05, "loss": 0.1035, "step": 87 }, { "epoch": 0.12772133526850507, "grad_norm": 0.12226138263940811, "learning_rate": 5.9713684210526305e-05, "loss": 0.1514, "step": 88 }, { "epoch": 0.12917271407837447, "grad_norm": 0.08483120799064636, "learning_rate": 5.918052631578947e-05, "loss": 0.1681, "step": 89 }, { "epoch": 0.13062409288824384, "grad_norm": 0.16209205985069275, "learning_rate": 5.8647368421052634e-05, "loss": 1.0777, "step": 90 }, { "epoch": 0.1320754716981132, "grad_norm": 0.16335219144821167, "learning_rate": 5.811421052631579e-05, "loss": 0.8582, "step": 91 }, { "epoch": 0.13352685050798258, "grad_norm": 0.14614659547805786, "learning_rate": 5.758105263157894e-05, "loss": 0.8367, "step": 92 }, { "epoch": 0.13497822931785197, "grad_norm": 0.12466667592525482, "learning_rate": 5.70478947368421e-05, "loss": 0.2996, "step": 93 }, { "epoch": 0.13642960812772134, "grad_norm": 0.16494490206241608, "learning_rate": 5.6514736842105256e-05, "loss": 0.7263, "step": 94 }, { "epoch": 0.1378809869375907, "grad_norm": 0.1597548872232437, "learning_rate": 5.5981578947368424e-05, "loss": 0.5913, "step": 95 }, { "epoch": 0.13933236574746008, "grad_norm": 0.246367946267128, "learning_rate": 5.544842105263158e-05, "loss": 1.299, "step": 96 }, { "epoch": 0.14078374455732948, "grad_norm": 0.2906947731971741, "learning_rate": 5.491526315789474e-05, "loss": 1.0694, "step": 97 }, { "epoch": 0.14223512336719885, "grad_norm": 0.32492387294769287, "learning_rate": 5.438210526315789e-05, "loss": 0.6249, "step": 98 }, { "epoch": 0.14368650217706821, "grad_norm": 0.3696464002132416, "learning_rate": 5.384894736842105e-05, "loss": 1.3239, "step": 99 }, { "epoch": 0.14513788098693758, "grad_norm": 0.4203333854675293, "learning_rate": 5.331578947368421e-05, "loss": 0.976, "step": 100 }, { "epoch": 0.14513788098693758, "eval_loss": 0.2287154644727707, "eval_runtime": 144.369, "eval_samples_per_second": 2.016, "eval_steps_per_second": 0.506, "step": 100 }, { "epoch": 0.14658925979680695, "grad_norm": 0.14465273916721344, "learning_rate": 5.278263157894736e-05, "loss": 0.7933, "step": 101 }, { "epoch": 0.14804063860667635, "grad_norm": 0.1529458463191986, "learning_rate": 5.224947368421053e-05, "loss": 0.4874, "step": 102 }, { "epoch": 0.14949201741654572, "grad_norm": 0.09371291100978851, "learning_rate": 5.171631578947368e-05, "loss": 0.1754, "step": 103 }, { "epoch": 0.1509433962264151, "grad_norm": 0.11294244974851608, "learning_rate": 5.1183157894736844e-05, "loss": 0.4367, "step": 104 }, { "epoch": 0.15239477503628446, "grad_norm": 0.11110277473926544, "learning_rate": 5.065e-05, "loss": 0.3404, "step": 105 }, { "epoch": 0.15384615384615385, "grad_norm": 0.09716267883777618, "learning_rate": 5.011684210526315e-05, "loss": 0.3296, "step": 106 }, { "epoch": 0.15529753265602322, "grad_norm": 0.15396001935005188, "learning_rate": 4.958368421052631e-05, "loss": 0.772, "step": 107 }, { "epoch": 0.1567489114658926, "grad_norm": 0.17330443859100342, "learning_rate": 4.9050526315789473e-05, "loss": 0.4806, "step": 108 }, { "epoch": 0.15820029027576196, "grad_norm": 0.15418781340122223, "learning_rate": 4.851736842105263e-05, "loss": 0.6729, "step": 109 }, { "epoch": 0.15965166908563136, "grad_norm": 0.13719098269939423, "learning_rate": 4.798421052631579e-05, "loss": 0.2485, "step": 110 }, { "epoch": 0.16110304789550073, "grad_norm": 0.1265023797750473, "learning_rate": 4.745105263157895e-05, "loss": 0.3845, "step": 111 }, { "epoch": 0.1625544267053701, "grad_norm": 0.15158045291900635, "learning_rate": 4.69178947368421e-05, "loss": 0.4527, "step": 112 }, { "epoch": 0.16400580551523947, "grad_norm": 0.12712687253952026, "learning_rate": 4.638473684210526e-05, "loss": 0.3207, "step": 113 }, { "epoch": 0.16545718432510886, "grad_norm": 0.10735659301280975, "learning_rate": 4.585157894736842e-05, "loss": 0.2524, "step": 114 }, { "epoch": 0.16690856313497823, "grad_norm": 0.14663982391357422, "learning_rate": 4.531842105263158e-05, "loss": 0.7199, "step": 115 }, { "epoch": 0.1683599419448476, "grad_norm": 0.12890632450580597, "learning_rate": 4.478526315789473e-05, "loss": 0.5994, "step": 116 }, { "epoch": 0.16981132075471697, "grad_norm": 0.16796326637268066, "learning_rate": 4.425210526315789e-05, "loss": 0.6609, "step": 117 }, { "epoch": 0.17126269956458637, "grad_norm": 0.21263499557971954, "learning_rate": 4.3718947368421054e-05, "loss": 0.6373, "step": 118 }, { "epoch": 0.17271407837445574, "grad_norm": 0.15164527297019958, "learning_rate": 4.318578947368421e-05, "loss": 0.3913, "step": 119 }, { "epoch": 0.1741654571843251, "grad_norm": 0.09173654019832611, "learning_rate": 4.265263157894736e-05, "loss": 0.22, "step": 120 }, { "epoch": 0.17561683599419448, "grad_norm": 0.10727760940790176, "learning_rate": 4.211947368421052e-05, "loss": 0.2657, "step": 121 }, { "epoch": 0.17706821480406387, "grad_norm": 0.1093788743019104, "learning_rate": 4.1586315789473684e-05, "loss": 0.1771, "step": 122 }, { "epoch": 0.17851959361393324, "grad_norm": 0.11123545467853546, "learning_rate": 4.105315789473684e-05, "loss": 0.3519, "step": 123 }, { "epoch": 0.1799709724238026, "grad_norm": 0.11694060266017914, "learning_rate": 4.052e-05, "loss": 0.2824, "step": 124 }, { "epoch": 0.18142235123367198, "grad_norm": 0.1088772639632225, "learning_rate": 3.998684210526316e-05, "loss": 0.3736, "step": 125 }, { "epoch": 0.18287373004354138, "grad_norm": 0.07843092083930969, "learning_rate": 3.945368421052631e-05, "loss": 0.1476, "step": 126 }, { "epoch": 0.18432510885341075, "grad_norm": 0.12299855053424835, "learning_rate": 3.892052631578947e-05, "loss": 0.3185, "step": 127 }, { "epoch": 0.18577648766328012, "grad_norm": 0.13632050156593323, "learning_rate": 3.838736842105263e-05, "loss": 0.3271, "step": 128 }, { "epoch": 0.18722786647314948, "grad_norm": 0.11568715423345566, "learning_rate": 3.785421052631579e-05, "loss": 0.2214, "step": 129 }, { "epoch": 0.18867924528301888, "grad_norm": 0.11555133759975433, "learning_rate": 3.732105263157894e-05, "loss": 0.2503, "step": 130 }, { "epoch": 0.19013062409288825, "grad_norm": 0.11684191972017288, "learning_rate": 3.67878947368421e-05, "loss": 0.1773, "step": 131 }, { "epoch": 0.19158200290275762, "grad_norm": 0.09825399518013, "learning_rate": 3.6254736842105264e-05, "loss": 0.2053, "step": 132 }, { "epoch": 0.193033381712627, "grad_norm": 0.13328059017658234, "learning_rate": 3.572157894736842e-05, "loss": 0.1957, "step": 133 }, { "epoch": 0.19448476052249636, "grad_norm": 0.1092911884188652, "learning_rate": 3.518842105263158e-05, "loss": 0.2233, "step": 134 }, { "epoch": 0.19593613933236576, "grad_norm": 0.10300347954034805, "learning_rate": 3.465526315789473e-05, "loss": 0.3399, "step": 135 }, { "epoch": 0.19738751814223512, "grad_norm": 0.1662655472755432, "learning_rate": 3.4122105263157894e-05, "loss": 0.2793, "step": 136 }, { "epoch": 0.1988388969521045, "grad_norm": 0.08663914352655411, "learning_rate": 3.358894736842105e-05, "loss": 0.1391, "step": 137 }, { "epoch": 0.20029027576197386, "grad_norm": 0.15232573449611664, "learning_rate": 3.305578947368421e-05, "loss": 0.3642, "step": 138 }, { "epoch": 0.20174165457184326, "grad_norm": 0.10668061673641205, "learning_rate": 3.252263157894737e-05, "loss": 0.1986, "step": 139 }, { "epoch": 0.20319303338171263, "grad_norm": 0.09178131073713303, "learning_rate": 3.198947368421052e-05, "loss": 0.2896, "step": 140 }, { "epoch": 0.204644412191582, "grad_norm": 0.1355268359184265, "learning_rate": 3.1456315789473684e-05, "loss": 0.8356, "step": 141 }, { "epoch": 0.20609579100145137, "grad_norm": 0.13758248090744019, "learning_rate": 3.092315789473684e-05, "loss": 0.6022, "step": 142 }, { "epoch": 0.20754716981132076, "grad_norm": 0.12314596027135849, "learning_rate": 3.039e-05, "loss": 0.4143, "step": 143 }, { "epoch": 0.20899854862119013, "grad_norm": 0.11046963185071945, "learning_rate": 2.9856842105263153e-05, "loss": 0.2393, "step": 144 }, { "epoch": 0.2104499274310595, "grad_norm": 0.16623206436634064, "learning_rate": 2.9323684210526317e-05, "loss": 0.3518, "step": 145 }, { "epoch": 0.21190130624092887, "grad_norm": 0.19511793553829193, "learning_rate": 2.879052631578947e-05, "loss": 0.5, "step": 146 }, { "epoch": 0.21335268505079827, "grad_norm": 0.2845495939254761, "learning_rate": 2.8257368421052628e-05, "loss": 0.7584, "step": 147 }, { "epoch": 0.21480406386066764, "grad_norm": 0.27343127131462097, "learning_rate": 2.772421052631579e-05, "loss": 0.87, "step": 148 }, { "epoch": 0.216255442670537, "grad_norm": 0.41281190514564514, "learning_rate": 2.7191052631578946e-05, "loss": 0.8707, "step": 149 }, { "epoch": 0.21770682148040638, "grad_norm": 0.3892386257648468, "learning_rate": 2.6657894736842104e-05, "loss": 0.5039, "step": 150 }, { "epoch": 0.21770682148040638, "eval_loss": 0.22993537783622742, "eval_runtime": 142.9262, "eval_samples_per_second": 2.036, "eval_steps_per_second": 0.511, "step": 150 }, { "epoch": 0.21915820029027577, "grad_norm": 0.15316051244735718, "learning_rate": 2.6124736842105265e-05, "loss": 1.0054, "step": 151 }, { "epoch": 0.22060957910014514, "grad_norm": 0.18049275875091553, "learning_rate": 2.5591578947368422e-05, "loss": 0.8339, "step": 152 }, { "epoch": 0.2220609579100145, "grad_norm": 0.1573140174150467, "learning_rate": 2.5058421052631576e-05, "loss": 0.5323, "step": 153 }, { "epoch": 0.22351233671988388, "grad_norm": 0.1617121547460556, "learning_rate": 2.4525263157894737e-05, "loss": 1.0705, "step": 154 }, { "epoch": 0.22496371552975328, "grad_norm": 0.12206801027059555, "learning_rate": 2.3992105263157894e-05, "loss": 0.5119, "step": 155 }, { "epoch": 0.22641509433962265, "grad_norm": 0.9858077764511108, "learning_rate": 2.345894736842105e-05, "loss": 1.0604, "step": 156 }, { "epoch": 0.22786647314949202, "grad_norm": 0.15416570007801056, "learning_rate": 2.292578947368421e-05, "loss": 0.2413, "step": 157 }, { "epoch": 0.22931785195936139, "grad_norm": 0.18875420093536377, "learning_rate": 2.2392631578947366e-05, "loss": 0.7481, "step": 158 }, { "epoch": 0.23076923076923078, "grad_norm": 0.12112567573785782, "learning_rate": 2.1859473684210527e-05, "loss": 0.32, "step": 159 }, { "epoch": 0.23222060957910015, "grad_norm": 0.14630571007728577, "learning_rate": 2.132631578947368e-05, "loss": 0.3848, "step": 160 }, { "epoch": 0.23367198838896952, "grad_norm": 0.10878642648458481, "learning_rate": 2.0793157894736842e-05, "loss": 0.4093, "step": 161 }, { "epoch": 0.2351233671988389, "grad_norm": 0.14565850794315338, "learning_rate": 2.026e-05, "loss": 0.4859, "step": 162 }, { "epoch": 0.2365747460087083, "grad_norm": 0.14800970256328583, "learning_rate": 1.9726842105263157e-05, "loss": 0.3829, "step": 163 }, { "epoch": 0.23802612481857766, "grad_norm": 0.14318357408046722, "learning_rate": 1.9193684210526314e-05, "loss": 0.8232, "step": 164 }, { "epoch": 0.23947750362844702, "grad_norm": 0.11647699773311615, "learning_rate": 1.866052631578947e-05, "loss": 0.3973, "step": 165 }, { "epoch": 0.2409288824383164, "grad_norm": 0.17588454484939575, "learning_rate": 1.8127368421052632e-05, "loss": 1.0372, "step": 166 }, { "epoch": 0.24238026124818576, "grad_norm": 0.22197209298610687, "learning_rate": 1.759421052631579e-05, "loss": 0.6101, "step": 167 }, { "epoch": 0.24383164005805516, "grad_norm": 0.11662589013576508, "learning_rate": 1.7061052631578947e-05, "loss": 0.3538, "step": 168 }, { "epoch": 0.24528301886792453, "grad_norm": 0.12414685636758804, "learning_rate": 1.6527894736842104e-05, "loss": 0.5084, "step": 169 }, { "epoch": 0.2467343976777939, "grad_norm": 0.10851942747831345, "learning_rate": 1.599473684210526e-05, "loss": 0.3311, "step": 170 }, { "epoch": 0.24818577648766327, "grad_norm": 0.1293657124042511, "learning_rate": 1.546157894736842e-05, "loss": 0.3142, "step": 171 }, { "epoch": 0.24963715529753266, "grad_norm": 0.19298462569713593, "learning_rate": 1.4928421052631576e-05, "loss": 0.4494, "step": 172 }, { "epoch": 0.251088534107402, "grad_norm": 0.12975549697875977, "learning_rate": 1.4395263157894735e-05, "loss": 0.2316, "step": 173 }, { "epoch": 0.2525399129172714, "grad_norm": 0.09491987526416779, "learning_rate": 1.3862105263157895e-05, "loss": 0.3277, "step": 174 }, { "epoch": 0.2539912917271408, "grad_norm": 0.21053697168827057, "learning_rate": 1.3328947368421052e-05, "loss": 0.3236, "step": 175 }, { "epoch": 0.25544267053701014, "grad_norm": 0.12965311110019684, "learning_rate": 1.2795789473684211e-05, "loss": 0.9922, "step": 176 }, { "epoch": 0.25689404934687954, "grad_norm": 0.10223641246557236, "learning_rate": 1.2262631578947368e-05, "loss": 0.3105, "step": 177 }, { "epoch": 0.25834542815674894, "grad_norm": 0.15014851093292236, "learning_rate": 1.1729473684210526e-05, "loss": 0.5113, "step": 178 }, { "epoch": 0.2597968069666183, "grad_norm": 0.1316860169172287, "learning_rate": 1.1196315789473683e-05, "loss": 0.5018, "step": 179 }, { "epoch": 0.2612481857764877, "grad_norm": 0.11262688785791397, "learning_rate": 1.066315789473684e-05, "loss": 0.2846, "step": 180 }, { "epoch": 0.262699564586357, "grad_norm": 0.14847137033939362, "learning_rate": 1.013e-05, "loss": 0.3721, "step": 181 }, { "epoch": 0.2641509433962264, "grad_norm": 0.11905442178249359, "learning_rate": 9.596842105263157e-06, "loss": 0.5352, "step": 182 }, { "epoch": 0.2656023222060958, "grad_norm": 0.12718041241168976, "learning_rate": 9.063684210526316e-06, "loss": 0.298, "step": 183 }, { "epoch": 0.26705370101596515, "grad_norm": 0.09555162489414215, "learning_rate": 8.530526315789473e-06, "loss": 0.2167, "step": 184 }, { "epoch": 0.26850507982583455, "grad_norm": 0.09585528820753098, "learning_rate": 7.99736842105263e-06, "loss": 0.6557, "step": 185 }, { "epoch": 0.26995645863570394, "grad_norm": 0.09632135927677155, "learning_rate": 7.464210526315788e-06, "loss": 0.2777, "step": 186 }, { "epoch": 0.2714078374455733, "grad_norm": 0.10153202712535858, "learning_rate": 6.931052631578947e-06, "loss": 0.237, "step": 187 }, { "epoch": 0.2728592162554427, "grad_norm": 0.1199401244521141, "learning_rate": 6.3978947368421055e-06, "loss": 0.2484, "step": 188 }, { "epoch": 0.274310595065312, "grad_norm": 0.15911225974559784, "learning_rate": 5.864736842105263e-06, "loss": 0.2827, "step": 189 }, { "epoch": 0.2757619738751814, "grad_norm": 0.12618254125118256, "learning_rate": 5.33157894736842e-06, "loss": 0.2016, "step": 190 }, { "epoch": 0.2772133526850508, "grad_norm": 0.1026952788233757, "learning_rate": 4.7984210526315785e-06, "loss": 0.282, "step": 191 }, { "epoch": 0.27866473149492016, "grad_norm": 0.10733366012573242, "learning_rate": 4.265263157894737e-06, "loss": 0.1684, "step": 192 }, { "epoch": 0.28011611030478956, "grad_norm": 0.12453214824199677, "learning_rate": 3.732105263157894e-06, "loss": 0.4221, "step": 193 }, { "epoch": 0.28156748911465895, "grad_norm": 0.19456051290035248, "learning_rate": 3.1989473684210527e-06, "loss": 0.4257, "step": 194 }, { "epoch": 0.2830188679245283, "grad_norm": 0.18270336091518402, "learning_rate": 2.66578947368421e-06, "loss": 0.8292, "step": 195 }, { "epoch": 0.2844702467343977, "grad_norm": 0.1802217960357666, "learning_rate": 2.1326315789473684e-06, "loss": 0.8952, "step": 196 }, { "epoch": 0.28592162554426703, "grad_norm": 0.23863941431045532, "learning_rate": 1.5994736842105264e-06, "loss": 0.6211, "step": 197 }, { "epoch": 0.28737300435413643, "grad_norm": 0.282530277967453, "learning_rate": 1.0663157894736842e-06, "loss": 0.6815, "step": 198 }, { "epoch": 0.2888243831640058, "grad_norm": 0.46267229318618774, "learning_rate": 5.331578947368421e-07, "loss": 0.6762, "step": 199 }, { "epoch": 0.29027576197387517, "grad_norm": 0.4375733733177185, "learning_rate": 0.0, "loss": 1.0879, "step": 200 }, { "epoch": 0.29027576197387517, "eval_loss": 0.22676756978034973, "eval_runtime": 144.3205, "eval_samples_per_second": 2.016, "eval_steps_per_second": 0.506, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.4053840999350272e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }