{ "best_metric": 3.009519338607788, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 0.3870967741935484, "eval_steps": 50, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0025806451612903226, "grad_norm": 2.02890944480896, "learning_rate": 1.007e-05, "loss": 3.2543, "step": 1 }, { "epoch": 0.0025806451612903226, "eval_loss": 3.4940507411956787, "eval_runtime": 39.1123, "eval_samples_per_second": 4.167, "eval_steps_per_second": 1.048, "step": 1 }, { "epoch": 0.005161290322580645, "grad_norm": 2.1640987396240234, "learning_rate": 2.014e-05, "loss": 3.1859, "step": 2 }, { "epoch": 0.007741935483870968, "grad_norm": 2.289116859436035, "learning_rate": 3.0209999999999997e-05, "loss": 3.1102, "step": 3 }, { "epoch": 0.01032258064516129, "grad_norm": 1.9029440879821777, "learning_rate": 4.028e-05, "loss": 3.1912, "step": 4 }, { "epoch": 0.012903225806451613, "grad_norm": 1.5423721075057983, "learning_rate": 5.035e-05, "loss": 3.3264, "step": 5 }, { "epoch": 0.015483870967741935, "grad_norm": 1.1242539882659912, "learning_rate": 6.0419999999999994e-05, "loss": 2.9698, "step": 6 }, { "epoch": 0.01806451612903226, "grad_norm": 1.6809178590774536, "learning_rate": 7.049e-05, "loss": 2.8629, "step": 7 }, { "epoch": 0.02064516129032258, "grad_norm": 1.3089873790740967, "learning_rate": 8.056e-05, "loss": 2.9564, "step": 8 }, { "epoch": 0.023225806451612905, "grad_norm": 0.9860974550247192, "learning_rate": 9.062999999999999e-05, "loss": 3.0032, "step": 9 }, { "epoch": 0.025806451612903226, "grad_norm": 0.9577404856681824, "learning_rate": 0.0001007, "loss": 2.7733, "step": 10 }, { "epoch": 0.02838709677419355, "grad_norm": 1.073362946510315, "learning_rate": 0.00010017, "loss": 3.0293, "step": 11 }, { "epoch": 0.03096774193548387, "grad_norm": 0.9577970504760742, "learning_rate": 9.963999999999999e-05, "loss": 3.0549, "step": 12 }, { "epoch": 0.03354838709677419, "grad_norm": 0.8062331676483154, "learning_rate": 9.910999999999999e-05, "loss": 2.9548, "step": 13 }, { "epoch": 0.03612903225806452, "grad_norm": 0.8570857644081116, "learning_rate": 9.858e-05, "loss": 3.1505, "step": 14 }, { "epoch": 0.03870967741935484, "grad_norm": 0.9095686674118042, "learning_rate": 9.805e-05, "loss": 2.8758, "step": 15 }, { "epoch": 0.04129032258064516, "grad_norm": 0.8106794953346252, "learning_rate": 9.752e-05, "loss": 2.9163, "step": 16 }, { "epoch": 0.04387096774193548, "grad_norm": 0.8135427832603455, "learning_rate": 9.698999999999999e-05, "loss": 3.1537, "step": 17 }, { "epoch": 0.04645161290322581, "grad_norm": 0.773794412612915, "learning_rate": 9.646e-05, "loss": 2.9923, "step": 18 }, { "epoch": 0.04903225806451613, "grad_norm": 0.776435911655426, "learning_rate": 9.593e-05, "loss": 2.994, "step": 19 }, { "epoch": 0.05161290322580645, "grad_norm": 0.702139139175415, "learning_rate": 9.539999999999999e-05, "loss": 2.8807, "step": 20 }, { "epoch": 0.05419354838709677, "grad_norm": 0.6850553750991821, "learning_rate": 9.487e-05, "loss": 3.0033, "step": 21 }, { "epoch": 0.0567741935483871, "grad_norm": 0.6869837045669556, "learning_rate": 9.434e-05, "loss": 2.7906, "step": 22 }, { "epoch": 0.05935483870967742, "grad_norm": 0.7767460942268372, "learning_rate": 9.381e-05, "loss": 2.8578, "step": 23 }, { "epoch": 0.06193548387096774, "grad_norm": 0.747832179069519, "learning_rate": 9.327999999999999e-05, "loss": 2.9695, "step": 24 }, { "epoch": 0.06451612903225806, "grad_norm": 0.7731716632843018, "learning_rate": 9.274999999999999e-05, "loss": 2.9062, "step": 25 }, { "epoch": 0.06709677419354838, "grad_norm": 0.8283132910728455, "learning_rate": 9.222e-05, "loss": 3.1388, "step": 26 }, { "epoch": 0.0696774193548387, "grad_norm": 0.8168233036994934, "learning_rate": 9.169e-05, "loss": 3.0032, "step": 27 }, { "epoch": 0.07225806451612904, "grad_norm": 0.7814300060272217, "learning_rate": 9.116e-05, "loss": 3.0617, "step": 28 }, { "epoch": 0.07483870967741936, "grad_norm": 0.8907764554023743, "learning_rate": 9.062999999999999e-05, "loss": 2.8873, "step": 29 }, { "epoch": 0.07741935483870968, "grad_norm": 0.897400975227356, "learning_rate": 9.01e-05, "loss": 3.0865, "step": 30 }, { "epoch": 0.08, "grad_norm": 0.916833758354187, "learning_rate": 8.957e-05, "loss": 2.9955, "step": 31 }, { "epoch": 0.08258064516129032, "grad_norm": 0.9079581499099731, "learning_rate": 8.903999999999999e-05, "loss": 2.9475, "step": 32 }, { "epoch": 0.08516129032258064, "grad_norm": 1.2848162651062012, "learning_rate": 8.850999999999999e-05, "loss": 3.0854, "step": 33 }, { "epoch": 0.08774193548387096, "grad_norm": 1.0301451683044434, "learning_rate": 8.798e-05, "loss": 3.2001, "step": 34 }, { "epoch": 0.09032258064516129, "grad_norm": 0.9421987533569336, "learning_rate": 8.745e-05, "loss": 2.7962, "step": 35 }, { "epoch": 0.09290322580645162, "grad_norm": 1.2306110858917236, "learning_rate": 8.692e-05, "loss": 3.1929, "step": 36 }, { "epoch": 0.09548387096774194, "grad_norm": 1.1693624258041382, "learning_rate": 8.638999999999999e-05, "loss": 3.1043, "step": 37 }, { "epoch": 0.09806451612903226, "grad_norm": 1.169491171836853, "learning_rate": 8.586e-05, "loss": 2.7704, "step": 38 }, { "epoch": 0.10064516129032258, "grad_norm": 1.1204756498336792, "learning_rate": 8.533e-05, "loss": 3.2268, "step": 39 }, { "epoch": 0.1032258064516129, "grad_norm": 1.1709730625152588, "learning_rate": 8.479999999999999e-05, "loss": 2.9685, "step": 40 }, { "epoch": 0.10580645161290322, "grad_norm": 1.2603025436401367, "learning_rate": 8.427e-05, "loss": 2.9147, "step": 41 }, { "epoch": 0.10838709677419354, "grad_norm": 1.5371952056884766, "learning_rate": 8.374e-05, "loss": 2.9618, "step": 42 }, { "epoch": 0.11096774193548387, "grad_norm": 1.4978915452957153, "learning_rate": 8.321e-05, "loss": 3.0561, "step": 43 }, { "epoch": 0.1135483870967742, "grad_norm": 1.8759700059890747, "learning_rate": 8.268e-05, "loss": 3.5141, "step": 44 }, { "epoch": 0.11612903225806452, "grad_norm": 1.6922487020492554, "learning_rate": 8.214999999999999e-05, "loss": 3.229, "step": 45 }, { "epoch": 0.11870967741935484, "grad_norm": 1.9749841690063477, "learning_rate": 8.162e-05, "loss": 3.2539, "step": 46 }, { "epoch": 0.12129032258064516, "grad_norm": 2.2926204204559326, "learning_rate": 8.108999999999998e-05, "loss": 3.1085, "step": 47 }, { "epoch": 0.12387096774193548, "grad_norm": 4.02115535736084, "learning_rate": 8.056e-05, "loss": 3.1481, "step": 48 }, { "epoch": 0.12645161290322582, "grad_norm": 4.62841272354126, "learning_rate": 8.003e-05, "loss": 3.7727, "step": 49 }, { "epoch": 0.12903225806451613, "grad_norm": 6.652851581573486, "learning_rate": 7.95e-05, "loss": 3.9126, "step": 50 }, { "epoch": 0.12903225806451613, "eval_loss": 3.116821765899658, "eval_runtime": 38.3432, "eval_samples_per_second": 4.251, "eval_steps_per_second": 1.069, "step": 50 }, { "epoch": 0.13161290322580646, "grad_norm": 1.2032880783081055, "learning_rate": 7.897e-05, "loss": 2.8811, "step": 51 }, { "epoch": 0.13419354838709677, "grad_norm": 0.9579372406005859, "learning_rate": 7.843999999999999e-05, "loss": 2.9946, "step": 52 }, { "epoch": 0.1367741935483871, "grad_norm": 0.7830987572669983, "learning_rate": 7.790999999999999e-05, "loss": 2.8823, "step": 53 }, { "epoch": 0.1393548387096774, "grad_norm": 0.6758972406387329, "learning_rate": 7.738e-05, "loss": 2.9346, "step": 54 }, { "epoch": 0.14193548387096774, "grad_norm": 0.6744924187660217, "learning_rate": 7.685e-05, "loss": 2.8026, "step": 55 }, { "epoch": 0.14451612903225808, "grad_norm": 0.6925913691520691, "learning_rate": 7.632e-05, "loss": 2.9121, "step": 56 }, { "epoch": 0.14709677419354839, "grad_norm": 0.6952354311943054, "learning_rate": 7.578999999999999e-05, "loss": 2.9217, "step": 57 }, { "epoch": 0.14967741935483872, "grad_norm": 0.6015385985374451, "learning_rate": 7.526e-05, "loss": 2.911, "step": 58 }, { "epoch": 0.15225806451612903, "grad_norm": 0.6113649606704712, "learning_rate": 7.473e-05, "loss": 2.8453, "step": 59 }, { "epoch": 0.15483870967741936, "grad_norm": 0.6024471521377563, "learning_rate": 7.419999999999999e-05, "loss": 2.7338, "step": 60 }, { "epoch": 0.15741935483870967, "grad_norm": 0.6171099543571472, "learning_rate": 7.367e-05, "loss": 2.7157, "step": 61 }, { "epoch": 0.16, "grad_norm": 0.6734641790390015, "learning_rate": 7.314e-05, "loss": 3.0369, "step": 62 }, { "epoch": 0.1625806451612903, "grad_norm": 0.6411603689193726, "learning_rate": 7.261e-05, "loss": 2.9175, "step": 63 }, { "epoch": 0.16516129032258065, "grad_norm": 0.6401498913764954, "learning_rate": 7.208e-05, "loss": 2.9601, "step": 64 }, { "epoch": 0.16774193548387098, "grad_norm": 0.6685578227043152, "learning_rate": 7.154999999999999e-05, "loss": 2.9055, "step": 65 }, { "epoch": 0.1703225806451613, "grad_norm": 0.6739301681518555, "learning_rate": 7.102e-05, "loss": 2.8973, "step": 66 }, { "epoch": 0.17290322580645162, "grad_norm": 0.6716254353523254, "learning_rate": 7.049e-05, "loss": 2.8507, "step": 67 }, { "epoch": 0.17548387096774193, "grad_norm": 0.6408494710922241, "learning_rate": 6.996e-05, "loss": 2.8232, "step": 68 }, { "epoch": 0.17806451612903226, "grad_norm": 0.6635752320289612, "learning_rate": 6.943e-05, "loss": 3.088, "step": 69 }, { "epoch": 0.18064516129032257, "grad_norm": 0.710978090763092, "learning_rate": 6.89e-05, "loss": 2.8905, "step": 70 }, { "epoch": 0.1832258064516129, "grad_norm": 0.7735083103179932, "learning_rate": 6.837e-05, "loss": 2.9583, "step": 71 }, { "epoch": 0.18580645161290324, "grad_norm": 0.7552114725112915, "learning_rate": 6.784e-05, "loss": 2.8666, "step": 72 }, { "epoch": 0.18838709677419355, "grad_norm": 0.8119356036186218, "learning_rate": 6.730999999999999e-05, "loss": 3.0893, "step": 73 }, { "epoch": 0.19096774193548388, "grad_norm": 0.7227278351783752, "learning_rate": 6.678e-05, "loss": 2.9174, "step": 74 }, { "epoch": 0.1935483870967742, "grad_norm": 0.7297806143760681, "learning_rate": 6.625e-05, "loss": 2.9618, "step": 75 }, { "epoch": 0.19612903225806452, "grad_norm": 0.7950009107589722, "learning_rate": 6.572e-05, "loss": 2.8738, "step": 76 }, { "epoch": 0.19870967741935483, "grad_norm": 0.7869434952735901, "learning_rate": 6.519e-05, "loss": 2.7843, "step": 77 }, { "epoch": 0.20129032258064516, "grad_norm": 0.8707318902015686, "learning_rate": 6.466e-05, "loss": 3.1622, "step": 78 }, { "epoch": 0.20387096774193547, "grad_norm": 0.8520801663398743, "learning_rate": 6.413e-05, "loss": 3.0377, "step": 79 }, { "epoch": 0.2064516129032258, "grad_norm": 0.9687163233757019, "learning_rate": 6.359999999999999e-05, "loss": 3.0816, "step": 80 }, { "epoch": 0.20903225806451614, "grad_norm": 0.8639389872550964, "learning_rate": 6.306999999999999e-05, "loss": 2.8825, "step": 81 }, { "epoch": 0.21161290322580645, "grad_norm": 1.042325735092163, "learning_rate": 6.254000000000001e-05, "loss": 2.9446, "step": 82 }, { "epoch": 0.21419354838709678, "grad_norm": 0.9391971230506897, "learning_rate": 6.201e-05, "loss": 3.0544, "step": 83 }, { "epoch": 0.2167741935483871, "grad_norm": 1.0379990339279175, "learning_rate": 6.148e-05, "loss": 3.1278, "step": 84 }, { "epoch": 0.21935483870967742, "grad_norm": 1.0052063465118408, "learning_rate": 6.095e-05, "loss": 3.0154, "step": 85 }, { "epoch": 0.22193548387096773, "grad_norm": 1.1294814348220825, "learning_rate": 6.0419999999999994e-05, "loss": 2.9711, "step": 86 }, { "epoch": 0.22451612903225807, "grad_norm": 1.1187207698822021, "learning_rate": 5.988999999999999e-05, "loss": 2.7353, "step": 87 }, { "epoch": 0.2270967741935484, "grad_norm": 1.1556931734085083, "learning_rate": 5.9359999999999994e-05, "loss": 3.0027, "step": 88 }, { "epoch": 0.2296774193548387, "grad_norm": 1.4021755456924438, "learning_rate": 5.8830000000000004e-05, "loss": 2.9716, "step": 89 }, { "epoch": 0.23225806451612904, "grad_norm": 1.32869291305542, "learning_rate": 5.83e-05, "loss": 3.2768, "step": 90 }, { "epoch": 0.23483870967741935, "grad_norm": 1.6008881330490112, "learning_rate": 5.777e-05, "loss": 3.2203, "step": 91 }, { "epoch": 0.23741935483870968, "grad_norm": 1.739267349243164, "learning_rate": 5.7239999999999994e-05, "loss": 3.0869, "step": 92 }, { "epoch": 0.24, "grad_norm": 1.6709328889846802, "learning_rate": 5.671e-05, "loss": 3.1174, "step": 93 }, { "epoch": 0.24258064516129033, "grad_norm": 2.5117313861846924, "learning_rate": 5.6179999999999994e-05, "loss": 3.2951, "step": 94 }, { "epoch": 0.24516129032258063, "grad_norm": 2.1630053520202637, "learning_rate": 5.5650000000000004e-05, "loss": 3.3658, "step": 95 }, { "epoch": 0.24774193548387097, "grad_norm": 2.027144193649292, "learning_rate": 5.512e-05, "loss": 3.3735, "step": 96 }, { "epoch": 0.2503225806451613, "grad_norm": 2.5083370208740234, "learning_rate": 5.459e-05, "loss": 2.9347, "step": 97 }, { "epoch": 0.25290322580645164, "grad_norm": 2.995940685272217, "learning_rate": 5.406e-05, "loss": 3.5815, "step": 98 }, { "epoch": 0.25548387096774194, "grad_norm": 3.9194164276123047, "learning_rate": 5.353e-05, "loss": 3.3064, "step": 99 }, { "epoch": 0.25806451612903225, "grad_norm": 7.003715991973877, "learning_rate": 5.2999999999999994e-05, "loss": 3.744, "step": 100 }, { "epoch": 0.25806451612903225, "eval_loss": 3.0592901706695557, "eval_runtime": 38.3187, "eval_samples_per_second": 4.254, "eval_steps_per_second": 1.07, "step": 100 }, { "epoch": 0.26064516129032256, "grad_norm": 0.8486892580986023, "learning_rate": 5.246999999999999e-05, "loss": 2.855, "step": 101 }, { "epoch": 0.2632258064516129, "grad_norm": 0.8181604146957397, "learning_rate": 5.194e-05, "loss": 3.0667, "step": 102 }, { "epoch": 0.2658064516129032, "grad_norm": 0.7249521017074585, "learning_rate": 5.141e-05, "loss": 2.6855, "step": 103 }, { "epoch": 0.26838709677419353, "grad_norm": 0.7008607387542725, "learning_rate": 5.088e-05, "loss": 2.9785, "step": 104 }, { "epoch": 0.2709677419354839, "grad_norm": 0.6476490497589111, "learning_rate": 5.035e-05, "loss": 3.0082, "step": 105 }, { "epoch": 0.2735483870967742, "grad_norm": 0.618168294429779, "learning_rate": 4.9819999999999994e-05, "loss": 2.8595, "step": 106 }, { "epoch": 0.2761290322580645, "grad_norm": 0.6012650728225708, "learning_rate": 4.929e-05, "loss": 2.7147, "step": 107 }, { "epoch": 0.2787096774193548, "grad_norm": 0.6011011600494385, "learning_rate": 4.876e-05, "loss": 2.7046, "step": 108 }, { "epoch": 0.2812903225806452, "grad_norm": 0.9905216693878174, "learning_rate": 4.823e-05, "loss": 2.8049, "step": 109 }, { "epoch": 0.2838709677419355, "grad_norm": 0.5935449600219727, "learning_rate": 4.7699999999999994e-05, "loss": 2.8319, "step": 110 }, { "epoch": 0.2864516129032258, "grad_norm": 0.5935178399085999, "learning_rate": 4.717e-05, "loss": 2.9189, "step": 111 }, { "epoch": 0.28903225806451616, "grad_norm": 0.6392220854759216, "learning_rate": 4.6639999999999994e-05, "loss": 2.6776, "step": 112 }, { "epoch": 0.29161290322580646, "grad_norm": 0.6458805799484253, "learning_rate": 4.611e-05, "loss": 2.8857, "step": 113 }, { "epoch": 0.29419354838709677, "grad_norm": 0.6118318438529968, "learning_rate": 4.558e-05, "loss": 2.7875, "step": 114 }, { "epoch": 0.2967741935483871, "grad_norm": 0.6118870377540588, "learning_rate": 4.505e-05, "loss": 2.7211, "step": 115 }, { "epoch": 0.29935483870967744, "grad_norm": 0.6446405053138733, "learning_rate": 4.4519999999999994e-05, "loss": 2.9338, "step": 116 }, { "epoch": 0.30193548387096775, "grad_norm": 0.641838014125824, "learning_rate": 4.399e-05, "loss": 2.6843, "step": 117 }, { "epoch": 0.30451612903225805, "grad_norm": 0.609104335308075, "learning_rate": 4.346e-05, "loss": 2.848, "step": 118 }, { "epoch": 0.30709677419354836, "grad_norm": 0.6547131538391113, "learning_rate": 4.293e-05, "loss": 2.9458, "step": 119 }, { "epoch": 0.3096774193548387, "grad_norm": 0.7462600469589233, "learning_rate": 4.2399999999999994e-05, "loss": 3.1013, "step": 120 }, { "epoch": 0.31225806451612903, "grad_norm": 0.6465981602668762, "learning_rate": 4.187e-05, "loss": 2.7645, "step": 121 }, { "epoch": 0.31483870967741934, "grad_norm": 0.7226136922836304, "learning_rate": 4.134e-05, "loss": 3.041, "step": 122 }, { "epoch": 0.3174193548387097, "grad_norm": 0.8358485698699951, "learning_rate": 4.081e-05, "loss": 2.996, "step": 123 }, { "epoch": 0.32, "grad_norm": 0.823491632938385, "learning_rate": 4.028e-05, "loss": 3.0795, "step": 124 }, { "epoch": 0.3225806451612903, "grad_norm": 0.7459017634391785, "learning_rate": 3.975e-05, "loss": 2.9079, "step": 125 }, { "epoch": 0.3251612903225806, "grad_norm": 0.7506483197212219, "learning_rate": 3.9219999999999994e-05, "loss": 2.7378, "step": 126 }, { "epoch": 0.327741935483871, "grad_norm": 0.8168128728866577, "learning_rate": 3.869e-05, "loss": 3.1751, "step": 127 }, { "epoch": 0.3303225806451613, "grad_norm": 0.8122110962867737, "learning_rate": 3.816e-05, "loss": 3.0125, "step": 128 }, { "epoch": 0.3329032258064516, "grad_norm": 0.8495927453041077, "learning_rate": 3.763e-05, "loss": 2.903, "step": 129 }, { "epoch": 0.33548387096774196, "grad_norm": 0.9135481119155884, "learning_rate": 3.7099999999999994e-05, "loss": 3.0412, "step": 130 }, { "epoch": 0.33806451612903227, "grad_norm": 0.8771790862083435, "learning_rate": 3.657e-05, "loss": 2.9565, "step": 131 }, { "epoch": 0.3406451612903226, "grad_norm": 0.899596631526947, "learning_rate": 3.604e-05, "loss": 3.1069, "step": 132 }, { "epoch": 0.3432258064516129, "grad_norm": 1.055289626121521, "learning_rate": 3.551e-05, "loss": 2.9057, "step": 133 }, { "epoch": 0.34580645161290324, "grad_norm": 1.0215229988098145, "learning_rate": 3.498e-05, "loss": 3.0173, "step": 134 }, { "epoch": 0.34838709677419355, "grad_norm": 1.0191702842712402, "learning_rate": 3.445e-05, "loss": 3.1466, "step": 135 }, { "epoch": 0.35096774193548386, "grad_norm": 1.1208670139312744, "learning_rate": 3.392e-05, "loss": 3.2075, "step": 136 }, { "epoch": 0.3535483870967742, "grad_norm": 1.1400412321090698, "learning_rate": 3.339e-05, "loss": 3.159, "step": 137 }, { "epoch": 0.3561290322580645, "grad_norm": 1.1518625020980835, "learning_rate": 3.286e-05, "loss": 3.1083, "step": 138 }, { "epoch": 0.35870967741935483, "grad_norm": 1.382748007774353, "learning_rate": 3.233e-05, "loss": 2.9339, "step": 139 }, { "epoch": 0.36129032258064514, "grad_norm": 1.279389500617981, "learning_rate": 3.1799999999999994e-05, "loss": 2.9156, "step": 140 }, { "epoch": 0.3638709677419355, "grad_norm": 1.5063074827194214, "learning_rate": 3.1270000000000004e-05, "loss": 3.4519, "step": 141 }, { "epoch": 0.3664516129032258, "grad_norm": 1.417640209197998, "learning_rate": 3.074e-05, "loss": 3.0448, "step": 142 }, { "epoch": 0.3690322580645161, "grad_norm": 1.8814888000488281, "learning_rate": 3.0209999999999997e-05, "loss": 2.7309, "step": 143 }, { "epoch": 0.3716129032258065, "grad_norm": 2.1574666500091553, "learning_rate": 2.9679999999999997e-05, "loss": 3.2975, "step": 144 }, { "epoch": 0.3741935483870968, "grad_norm": 2.018376350402832, "learning_rate": 2.915e-05, "loss": 2.935, "step": 145 }, { "epoch": 0.3767741935483871, "grad_norm": 2.580366373062134, "learning_rate": 2.8619999999999997e-05, "loss": 3.0637, "step": 146 }, { "epoch": 0.3793548387096774, "grad_norm": 2.8553295135498047, "learning_rate": 2.8089999999999997e-05, "loss": 3.2642, "step": 147 }, { "epoch": 0.38193548387096776, "grad_norm": 4.084458351135254, "learning_rate": 2.756e-05, "loss": 3.5309, "step": 148 }, { "epoch": 0.38451612903225807, "grad_norm": 7.080618858337402, "learning_rate": 2.703e-05, "loss": 3.7543, "step": 149 }, { "epoch": 0.3870967741935484, "grad_norm": 9.050718307495117, "learning_rate": 2.6499999999999997e-05, "loss": 4.0362, "step": 150 }, { "epoch": 0.3870967741935484, "eval_loss": 3.009519338607788, "eval_runtime": 37.8132, "eval_samples_per_second": 4.311, "eval_steps_per_second": 1.084, "step": 150 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.2642772657058611e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }