{ "best_metric": 10.355819702148438, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 3.0043478260869567, "eval_steps": 50, "global_step": 172, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.017391304347826087, "grad_norm": 0.0354011207818985, "learning_rate": 1e-05, "loss": 10.3777, "step": 1 }, { "epoch": 0.017391304347826087, "eval_loss": 10.378003120422363, "eval_runtime": 0.2363, "eval_samples_per_second": 410.497, "eval_steps_per_second": 105.798, "step": 1 }, { "epoch": 0.034782608695652174, "grad_norm": 0.03678897023200989, "learning_rate": 2e-05, "loss": 10.3799, "step": 2 }, { "epoch": 0.05217391304347826, "grad_norm": 0.031849347054958344, "learning_rate": 3e-05, "loss": 10.3786, "step": 3 }, { "epoch": 0.06956521739130435, "grad_norm": 0.029461175203323364, "learning_rate": 4e-05, "loss": 10.3781, "step": 4 }, { "epoch": 0.08695652173913043, "grad_norm": 0.029609883204102516, "learning_rate": 5e-05, "loss": 10.3783, "step": 5 }, { "epoch": 0.10434782608695652, "grad_norm": 0.02652357518672943, "learning_rate": 6e-05, "loss": 10.3787, "step": 6 }, { "epoch": 0.12173913043478261, "grad_norm": 0.027111053466796875, "learning_rate": 7e-05, "loss": 10.3768, "step": 7 }, { "epoch": 0.1391304347826087, "grad_norm": 0.02587701380252838, "learning_rate": 8e-05, "loss": 10.3786, "step": 8 }, { "epoch": 0.1565217391304348, "grad_norm": 0.02637583389878273, "learning_rate": 9e-05, "loss": 10.3766, "step": 9 }, { "epoch": 0.17391304347826086, "grad_norm": 0.025196833536028862, "learning_rate": 0.0001, "loss": 10.3766, "step": 10 }, { "epoch": 0.19130434782608696, "grad_norm": 0.031075116246938705, "learning_rate": 9.999059852242507e-05, "loss": 10.3763, "step": 11 }, { "epoch": 0.20869565217391303, "grad_norm": 0.028922855854034424, "learning_rate": 9.996239762521151e-05, "loss": 10.3755, "step": 12 }, { "epoch": 0.22608695652173913, "grad_norm": 0.025266852229833603, "learning_rate": 9.991540791356342e-05, "loss": 10.3757, "step": 13 }, { "epoch": 0.24347826086956523, "grad_norm": 0.03289405256509781, "learning_rate": 9.98496470583896e-05, "loss": 10.3759, "step": 14 }, { "epoch": 0.2608695652173913, "grad_norm": 0.04267113655805588, "learning_rate": 9.976513978965829e-05, "loss": 10.3774, "step": 15 }, { "epoch": 0.2782608695652174, "grad_norm": 0.0315500870347023, "learning_rate": 9.966191788709716e-05, "loss": 10.3758, "step": 16 }, { "epoch": 0.2956521739130435, "grad_norm": 0.03112574853003025, "learning_rate": 9.954002016824227e-05, "loss": 10.3772, "step": 17 }, { "epoch": 0.3130434782608696, "grad_norm": 0.038458969444036484, "learning_rate": 9.939949247384046e-05, "loss": 10.3772, "step": 18 }, { "epoch": 0.33043478260869563, "grad_norm": 0.031852226704359055, "learning_rate": 9.924038765061042e-05, "loss": 10.3777, "step": 19 }, { "epoch": 0.34782608695652173, "grad_norm": 0.03086390160024166, "learning_rate": 9.906276553136923e-05, "loss": 10.3766, "step": 20 }, { "epoch": 0.3652173913043478, "grad_norm": 0.029753221198916435, "learning_rate": 9.88666929125318e-05, "loss": 10.3759, "step": 21 }, { "epoch": 0.3826086956521739, "grad_norm": 0.03145391866564751, "learning_rate": 9.865224352899119e-05, "loss": 10.3748, "step": 22 }, { "epoch": 0.4, "grad_norm": 0.0374707393348217, "learning_rate": 9.84194980263903e-05, "loss": 10.3737, "step": 23 }, { "epoch": 0.41739130434782606, "grad_norm": 0.041236892342567444, "learning_rate": 9.816854393079403e-05, "loss": 10.3749, "step": 24 }, { "epoch": 0.43478260869565216, "grad_norm": 0.036372095346450806, "learning_rate": 9.789947561577445e-05, "loss": 10.3728, "step": 25 }, { "epoch": 0.45217391304347826, "grad_norm": 0.0463377870619297, "learning_rate": 9.761239426692077e-05, "loss": 10.3741, "step": 26 }, { "epoch": 0.46956521739130436, "grad_norm": 0.04693188890814781, "learning_rate": 9.730740784378753e-05, "loss": 10.3732, "step": 27 }, { "epoch": 0.48695652173913045, "grad_norm": 0.04398968815803528, "learning_rate": 9.698463103929542e-05, "loss": 10.3748, "step": 28 }, { "epoch": 0.5043478260869565, "grad_norm": 0.06575161963701248, "learning_rate": 9.664418523660004e-05, "loss": 10.3737, "step": 29 }, { "epoch": 0.5217391304347826, "grad_norm": 0.047640036791563034, "learning_rate": 9.628619846344454e-05, "loss": 10.3741, "step": 30 }, { "epoch": 0.5391304347826087, "grad_norm": 0.06228671595454216, "learning_rate": 9.591080534401371e-05, "loss": 10.3732, "step": 31 }, { "epoch": 0.5565217391304348, "grad_norm": 0.057683371007442474, "learning_rate": 9.551814704830734e-05, "loss": 10.3712, "step": 32 }, { "epoch": 0.5739130434782609, "grad_norm": 0.06257960200309753, "learning_rate": 9.51083712390519e-05, "loss": 10.3716, "step": 33 }, { "epoch": 0.591304347826087, "grad_norm": 0.06716064363718033, "learning_rate": 9.468163201617062e-05, "loss": 10.3718, "step": 34 }, { "epoch": 0.6086956521739131, "grad_norm": 0.06136242672801018, "learning_rate": 9.423808985883289e-05, "loss": 10.3707, "step": 35 }, { "epoch": 0.6260869565217392, "grad_norm": 0.06386753171682358, "learning_rate": 9.377791156510455e-05, "loss": 10.3704, "step": 36 }, { "epoch": 0.6434782608695652, "grad_norm": 0.06511002033948898, "learning_rate": 9.330127018922194e-05, "loss": 10.3704, "step": 37 }, { "epoch": 0.6608695652173913, "grad_norm": 0.05235262215137482, "learning_rate": 9.280834497651334e-05, "loss": 10.3693, "step": 38 }, { "epoch": 0.6782608695652174, "grad_norm": 0.06493156403303146, "learning_rate": 9.229932129599205e-05, "loss": 10.3706, "step": 39 }, { "epoch": 0.6956521739130435, "grad_norm": 0.06527063995599747, "learning_rate": 9.177439057064683e-05, "loss": 10.3679, "step": 40 }, { "epoch": 0.7130434782608696, "grad_norm": 0.06748928874731064, "learning_rate": 9.123375020545535e-05, "loss": 10.3698, "step": 41 }, { "epoch": 0.7304347826086957, "grad_norm": 0.0832194909453392, "learning_rate": 9.067760351314838e-05, "loss": 10.3684, "step": 42 }, { "epoch": 0.7478260869565218, "grad_norm": 0.09925281256437302, "learning_rate": 9.01061596377522e-05, "loss": 10.3656, "step": 43 }, { "epoch": 0.7652173913043478, "grad_norm": 0.09170603007078171, "learning_rate": 8.951963347593797e-05, "loss": 10.3647, "step": 44 }, { "epoch": 0.782608695652174, "grad_norm": 0.0749170184135437, "learning_rate": 8.891824559620801e-05, "loss": 10.3668, "step": 45 }, { "epoch": 0.8, "grad_norm": 0.08373304456472397, "learning_rate": 8.83022221559489e-05, "loss": 10.366, "step": 46 }, { "epoch": 0.8173913043478261, "grad_norm": 0.08916713297367096, "learning_rate": 8.767179481638303e-05, "loss": 10.3613, "step": 47 }, { "epoch": 0.8347826086956521, "grad_norm": 0.07597869634628296, "learning_rate": 8.702720065545024e-05, "loss": 10.367, "step": 48 }, { "epoch": 0.8521739130434782, "grad_norm": 0.059838421642780304, "learning_rate": 8.636868207865244e-05, "loss": 10.365, "step": 49 }, { "epoch": 0.8695652173913043, "grad_norm": 0.06458688527345657, "learning_rate": 8.569648672789497e-05, "loss": 10.3623, "step": 50 }, { "epoch": 0.8695652173913043, "eval_loss": 10.36270523071289, "eval_runtime": 0.2351, "eval_samples_per_second": 412.517, "eval_steps_per_second": 106.319, "step": 50 }, { "epoch": 0.8869565217391304, "grad_norm": 0.05858244374394417, "learning_rate": 8.501086738835843e-05, "loss": 10.3653, "step": 51 }, { "epoch": 0.9043478260869565, "grad_norm": 0.06423904001712799, "learning_rate": 8.43120818934367e-05, "loss": 10.3634, "step": 52 }, { "epoch": 0.9217391304347826, "grad_norm": 0.06382982432842255, "learning_rate": 8.360039302777612e-05, "loss": 10.3631, "step": 53 }, { "epoch": 0.9391304347826087, "grad_norm": 0.061683498322963715, "learning_rate": 8.28760684284532e-05, "loss": 10.3639, "step": 54 }, { "epoch": 0.9565217391304348, "grad_norm": 0.06433191895484924, "learning_rate": 8.213938048432697e-05, "loss": 10.3653, "step": 55 }, { "epoch": 0.9739130434782609, "grad_norm": 0.07138363271951675, "learning_rate": 8.139060623360493e-05, "loss": 10.3626, "step": 56 }, { "epoch": 0.991304347826087, "grad_norm": 0.07230795174837112, "learning_rate": 8.063002725966015e-05, "loss": 10.3593, "step": 57 }, { "epoch": 1.0130434782608695, "grad_norm": 0.12195714563131332, "learning_rate": 7.985792958513931e-05, "loss": 18.1899, "step": 58 }, { "epoch": 1.0304347826086957, "grad_norm": 0.07984144985675812, "learning_rate": 7.907460356440133e-05, "loss": 10.3174, "step": 59 }, { "epoch": 1.0478260869565217, "grad_norm": 0.0633162409067154, "learning_rate": 7.828034377432693e-05, "loss": 10.3611, "step": 60 }, { "epoch": 1.065217391304348, "grad_norm": 0.06817752122879028, "learning_rate": 7.74754489035403e-05, "loss": 10.3416, "step": 61 }, { "epoch": 1.0826086956521739, "grad_norm": 0.053118109703063965, "learning_rate": 7.666022164008457e-05, "loss": 10.3772, "step": 62 }, { "epoch": 1.1, "grad_norm": 0.051847703754901886, "learning_rate": 7.583496855759316e-05, "loss": 10.3679, "step": 63 }, { "epoch": 1.117391304347826, "grad_norm": 0.057412922382354736, "learning_rate": 7.500000000000001e-05, "loss": 10.3399, "step": 64 }, { "epoch": 1.134782608695652, "grad_norm": 0.047004085034132004, "learning_rate": 7.415562996483192e-05, "loss": 10.3498, "step": 65 }, { "epoch": 1.1521739130434783, "grad_norm": 0.06264777481555939, "learning_rate": 7.330217598512695e-05, "loss": 10.3728, "step": 66 }, { "epoch": 1.1695652173913043, "grad_norm": 0.0539216622710228, "learning_rate": 7.243995901002312e-05, "loss": 10.3329, "step": 67 }, { "epoch": 1.1869565217391305, "grad_norm": 0.05070541426539421, "learning_rate": 7.156930328406268e-05, "loss": 10.3687, "step": 68 }, { "epoch": 1.2043478260869565, "grad_norm": 0.05166243761777878, "learning_rate": 7.069053622525696e-05, "loss": 10.3839, "step": 69 }, { "epoch": 1.2217391304347827, "grad_norm": 0.03610668703913689, "learning_rate": 6.980398830195785e-05, "loss": 10.3685, "step": 70 }, { "epoch": 1.2391304347826086, "grad_norm": 0.06155957281589508, "learning_rate": 6.890999290858214e-05, "loss": 10.7123, "step": 71 }, { "epoch": 1.2565217391304349, "grad_norm": 0.07164964824914932, "learning_rate": 6.800888624023553e-05, "loss": 10.0488, "step": 72 }, { "epoch": 1.2739130434782608, "grad_norm": 0.06564721465110779, "learning_rate": 6.710100716628344e-05, "loss": 10.3054, "step": 73 }, { "epoch": 1.2913043478260868, "grad_norm": 0.04340159520506859, "learning_rate": 6.618669710291606e-05, "loss": 10.3392, "step": 74 }, { "epoch": 1.308695652173913, "grad_norm": 0.047077324241399765, "learning_rate": 6.526629988475567e-05, "loss": 10.361, "step": 75 }, { "epoch": 1.3260869565217392, "grad_norm": 0.0404900424182415, "learning_rate": 6.434016163555452e-05, "loss": 10.3464, "step": 76 }, { "epoch": 1.3434782608695652, "grad_norm": 0.05057869851589203, "learning_rate": 6.340863063803188e-05, "loss": 10.361, "step": 77 }, { "epoch": 1.3608695652173912, "grad_norm": 0.04050518199801445, "learning_rate": 6.247205720289907e-05, "loss": 10.3573, "step": 78 }, { "epoch": 1.3782608695652174, "grad_norm": 0.0321977436542511, "learning_rate": 6.153079353712201e-05, "loss": 10.3682, "step": 79 }, { "epoch": 1.3956521739130434, "grad_norm": 0.03848752751946449, "learning_rate": 6.058519361147055e-05, "loss": 10.3614, "step": 80 }, { "epoch": 1.4130434782608696, "grad_norm": 0.04897672310471535, "learning_rate": 5.963561302740449e-05, "loss": 10.3315, "step": 81 }, { "epoch": 1.4304347826086956, "grad_norm": 0.047879792749881744, "learning_rate": 5.868240888334653e-05, "loss": 10.3816, "step": 82 }, { "epoch": 1.4478260869565218, "grad_norm": 0.03946797922253609, "learning_rate": 5.772593964039203e-05, "loss": 10.3688, "step": 83 }, { "epoch": 1.4652173913043478, "grad_norm": 0.04519697278738022, "learning_rate": 5.6766564987506566e-05, "loss": 10.332, "step": 84 }, { "epoch": 1.482608695652174, "grad_norm": 0.05987326055765152, "learning_rate": 5.5804645706261514e-05, "loss": 10.6567, "step": 85 }, { "epoch": 1.5, "grad_norm": 0.06895023584365845, "learning_rate": 5.484054353515896e-05, "loss": 10.1029, "step": 86 }, { "epoch": 1.517391304347826, "grad_norm": 0.04168372601270676, "learning_rate": 5.387462103359655e-05, "loss": 10.2933, "step": 87 }, { "epoch": 1.5347826086956522, "grad_norm": 0.05664673075079918, "learning_rate": 5.290724144552379e-05, "loss": 10.3656, "step": 88 }, { "epoch": 1.5521739130434784, "grad_norm": 0.044364169239997864, "learning_rate": 5.193876856284085e-05, "loss": 10.3533, "step": 89 }, { "epoch": 1.5695652173913044, "grad_norm": 0.03303051367402077, "learning_rate": 5.096956658859122e-05, "loss": 10.3517, "step": 90 }, { "epoch": 1.5869565217391304, "grad_norm": 0.03159893676638603, "learning_rate": 5e-05, "loss": 10.3686, "step": 91 }, { "epoch": 1.6043478260869564, "grad_norm": 0.03333815187215805, "learning_rate": 4.903043341140879e-05, "loss": 10.3937, "step": 92 }, { "epoch": 1.6217391304347826, "grad_norm": 0.031498417258262634, "learning_rate": 4.806123143715916e-05, "loss": 10.3292, "step": 93 }, { "epoch": 1.6391304347826088, "grad_norm": 0.04209763929247856, "learning_rate": 4.709275855447621e-05, "loss": 10.3423, "step": 94 }, { "epoch": 1.6565217391304348, "grad_norm": 0.0396493598818779, "learning_rate": 4.612537896640346e-05, "loss": 10.3585, "step": 95 }, { "epoch": 1.6739130434782608, "grad_norm": 0.04663508012890816, "learning_rate": 4.515945646484105e-05, "loss": 10.3571, "step": 96 }, { "epoch": 1.691304347826087, "grad_norm": 0.035037729889154434, "learning_rate": 4.4195354293738484e-05, "loss": 10.3413, "step": 97 }, { "epoch": 1.7086956521739132, "grad_norm": 0.05645934119820595, "learning_rate": 4.323343501249346e-05, "loss": 10.4193, "step": 98 }, { "epoch": 1.7260869565217392, "grad_norm": 0.04999447241425514, "learning_rate": 4.227406035960798e-05, "loss": 10.4981, "step": 99 }, { "epoch": 1.7434782608695651, "grad_norm": 0.06257407367229462, "learning_rate": 4.131759111665349e-05, "loss": 10.2465, "step": 100 }, { "epoch": 1.7434782608695651, "eval_loss": 10.356426239013672, "eval_runtime": 0.2354, "eval_samples_per_second": 412.137, "eval_steps_per_second": 106.221, "step": 100 }, { "epoch": 1.7608695652173914, "grad_norm": 0.04908235743641853, "learning_rate": 4.036438697259551e-05, "loss": 10.2902, "step": 101 }, { "epoch": 1.7782608695652173, "grad_norm": 0.02860858105123043, "learning_rate": 3.941480638852948e-05, "loss": 10.3516, "step": 102 }, { "epoch": 1.7956521739130435, "grad_norm": 0.031241578981280327, "learning_rate": 3.846920646287799e-05, "loss": 10.3411, "step": 103 }, { "epoch": 1.8130434782608695, "grad_norm": 0.0567951574921608, "learning_rate": 3.752794279710094e-05, "loss": 10.3461, "step": 104 }, { "epoch": 1.8304347826086955, "grad_norm": 0.026230156421661377, "learning_rate": 3.6591369361968124e-05, "loss": 10.3949, "step": 105 }, { "epoch": 1.8478260869565217, "grad_norm": 0.03171601891517639, "learning_rate": 3.5659838364445505e-05, "loss": 10.3279, "step": 106 }, { "epoch": 1.865217391304348, "grad_norm": 0.035893335938453674, "learning_rate": 3.473370011524435e-05, "loss": 10.3625, "step": 107 }, { "epoch": 1.882608695652174, "grad_norm": 0.042509518563747406, "learning_rate": 3.381330289708396e-05, "loss": 10.3694, "step": 108 }, { "epoch": 1.9, "grad_norm": 0.05423169583082199, "learning_rate": 3.289899283371657e-05, "loss": 10.3531, "step": 109 }, { "epoch": 1.9173913043478261, "grad_norm": 0.02653021737933159, "learning_rate": 3.199111375976449e-05, "loss": 10.3761, "step": 110 }, { "epoch": 1.9347826086956523, "grad_norm": 0.0314241424202919, "learning_rate": 3.109000709141788e-05, "loss": 10.3185, "step": 111 }, { "epoch": 1.9521739130434783, "grad_norm": 0.02957828901708126, "learning_rate": 3.019601169804216e-05, "loss": 10.3912, "step": 112 }, { "epoch": 1.9695652173913043, "grad_norm": 0.05682424083352089, "learning_rate": 2.9309463774743046e-05, "loss": 10.5149, "step": 113 }, { "epoch": 1.9869565217391303, "grad_norm": 0.04566289857029915, "learning_rate": 2.8430696715937337e-05, "loss": 10.3326, "step": 114 }, { "epoch": 2.008695652173913, "grad_norm": 0.04557787626981735, "learning_rate": 2.7560040989976892e-05, "loss": 18.0535, "step": 115 }, { "epoch": 2.026086956521739, "grad_norm": 0.03891031816601753, "learning_rate": 2.6697824014873075e-05, "loss": 10.2499, "step": 116 }, { "epoch": 2.0434782608695654, "grad_norm": 0.03375143185257912, "learning_rate": 2.5844370035168073e-05, "loss": 10.3784, "step": 117 }, { "epoch": 2.0608695652173914, "grad_norm": 0.034450430423021317, "learning_rate": 2.500000000000001e-05, "loss": 10.3636, "step": 118 }, { "epoch": 2.0782608695652174, "grad_norm": 0.04428379610180855, "learning_rate": 2.4165031442406855e-05, "loss": 10.3354, "step": 119 }, { "epoch": 2.0956521739130434, "grad_norm": 0.03167899325489998, "learning_rate": 2.333977835991545e-05, "loss": 10.3506, "step": 120 }, { "epoch": 2.1130434782608694, "grad_norm": 0.030559729784727097, "learning_rate": 2.25245510964597e-05, "loss": 10.4021, "step": 121 }, { "epoch": 2.130434782608696, "grad_norm": 0.02466226927936077, "learning_rate": 2.171965622567308e-05, "loss": 10.2999, "step": 122 }, { "epoch": 2.1478260869565218, "grad_norm": 0.03233836963772774, "learning_rate": 2.0925396435598664e-05, "loss": 10.3807, "step": 123 }, { "epoch": 2.1652173913043478, "grad_norm": 0.05373305082321167, "learning_rate": 2.0142070414860704e-05, "loss": 10.3485, "step": 124 }, { "epoch": 2.1826086956521737, "grad_norm": 0.042647767812013626, "learning_rate": 1.936997274033986e-05, "loss": 10.3668, "step": 125 }, { "epoch": 2.2, "grad_norm": 0.036346666514873505, "learning_rate": 1.8609393766395085e-05, "loss": 10.3566, "step": 126 }, { "epoch": 2.217391304347826, "grad_norm": 0.03858176991343498, "learning_rate": 1.7860619515673033e-05, "loss": 10.4032, "step": 127 }, { "epoch": 2.234782608695652, "grad_norm": 0.059903256595134735, "learning_rate": 1.7123931571546827e-05, "loss": 10.5719, "step": 128 }, { "epoch": 2.252173913043478, "grad_norm": 0.06582050770521164, "learning_rate": 1.639960697222388e-05, "loss": 10.2509, "step": 129 }, { "epoch": 2.269565217391304, "grad_norm": 0.0548245906829834, "learning_rate": 1.5687918106563326e-05, "loss": 10.2626, "step": 130 }, { "epoch": 2.2869565217391306, "grad_norm": 0.04182303696870804, "learning_rate": 1.4989132611641576e-05, "loss": 10.2849, "step": 131 }, { "epoch": 2.3043478260869565, "grad_norm": 0.03637641668319702, "learning_rate": 1.4303513272105057e-05, "loss": 10.3165, "step": 132 }, { "epoch": 2.3217391304347825, "grad_norm": 0.043039511889219284, "learning_rate": 1.3631317921347563e-05, "loss": 10.4185, "step": 133 }, { "epoch": 2.3391304347826085, "grad_norm": 0.045653752982616425, "learning_rate": 1.297279934454978e-05, "loss": 10.3653, "step": 134 }, { "epoch": 2.356521739130435, "grad_norm": 0.030604032799601555, "learning_rate": 1.2328205183616965e-05, "loss": 10.319, "step": 135 }, { "epoch": 2.373913043478261, "grad_norm": 0.04073254391551018, "learning_rate": 1.1697777844051105e-05, "loss": 10.3873, "step": 136 }, { "epoch": 2.391304347826087, "grad_norm": 0.035167474299669266, "learning_rate": 1.1081754403791999e-05, "loss": 10.3317, "step": 137 }, { "epoch": 2.408695652173913, "grad_norm": 0.037993211299180984, "learning_rate": 1.0480366524062042e-05, "loss": 10.3626, "step": 138 }, { "epoch": 2.426086956521739, "grad_norm": 0.04481872171163559, "learning_rate": 9.893840362247809e-06, "loss": 10.3341, "step": 139 }, { "epoch": 2.4434782608695653, "grad_norm": 0.027508754283189774, "learning_rate": 9.322396486851626e-06, "loss": 10.3795, "step": 140 }, { "epoch": 2.4608695652173913, "grad_norm": 0.03096911311149597, "learning_rate": 8.766249794544662e-06, "loss": 10.3611, "step": 141 }, { "epoch": 2.4782608695652173, "grad_norm": 0.0561353974044323, "learning_rate": 8.225609429353187e-06, "loss": 10.5768, "step": 142 }, { "epoch": 2.4956521739130437, "grad_norm": 0.04231831058859825, "learning_rate": 7.700678704007947e-06, "loss": 10.2581, "step": 143 }, { "epoch": 2.5130434782608697, "grad_norm": 0.04914220795035362, "learning_rate": 7.191655023486682e-06, "loss": 10.2604, "step": 144 }, { "epoch": 2.5304347826086957, "grad_norm": 0.0375588983297348, "learning_rate": 6.698729810778065e-06, "loss": 10.3207, "step": 145 }, { "epoch": 2.5478260869565217, "grad_norm": 0.04075303673744202, "learning_rate": 6.222088434895462e-06, "loss": 10.3565, "step": 146 }, { "epoch": 2.5652173913043477, "grad_norm": 0.04066910222172737, "learning_rate": 5.7619101411671095e-06, "loss": 10.3406, "step": 147 }, { "epoch": 2.5826086956521737, "grad_norm": 0.02909262664616108, "learning_rate": 5.318367983829392e-06, "loss": 10.354, "step": 148 }, { "epoch": 2.6, "grad_norm": 0.03126854449510574, "learning_rate": 4.891628760948114e-06, "loss": 10.3359, "step": 149 }, { "epoch": 2.617391304347826, "grad_norm": 0.03613854572176933, "learning_rate": 4.4818529516926726e-06, "loss": 10.4205, "step": 150 }, { "epoch": 2.617391304347826, "eval_loss": 10.355819702148438, "eval_runtime": 0.2337, "eval_samples_per_second": 415.109, "eval_steps_per_second": 106.987, "step": 150 }, { "epoch": 2.634782608695652, "grad_norm": 0.03242945298552513, "learning_rate": 4.089194655986306e-06, "loss": 10.2923, "step": 151 }, { "epoch": 2.6521739130434785, "grad_norm": 0.04083726182579994, "learning_rate": 3.7138015365554833e-06, "loss": 10.3738, "step": 152 }, { "epoch": 2.6695652173913045, "grad_norm": 0.024881916120648384, "learning_rate": 3.3558147633999728e-06, "loss": 10.3584, "step": 153 }, { "epoch": 2.6869565217391305, "grad_norm": 0.02501659281551838, "learning_rate": 3.0153689607045845e-06, "loss": 10.3911, "step": 154 }, { "epoch": 2.7043478260869565, "grad_norm": 0.04259592667222023, "learning_rate": 2.692592156212487e-06, "loss": 10.3425, "step": 155 }, { "epoch": 2.7217391304347824, "grad_norm": 0.05039789155125618, "learning_rate": 2.3876057330792346e-06, "loss": 10.5843, "step": 156 }, { "epoch": 2.7391304347826084, "grad_norm": 0.0602952279150486, "learning_rate": 2.100524384225555e-06, "loss": 10.2527, "step": 157 }, { "epoch": 2.756521739130435, "grad_norm": 0.055879686027765274, "learning_rate": 1.8314560692059835e-06, "loss": 10.2296, "step": 158 }, { "epoch": 2.773913043478261, "grad_norm": 0.05151304230093956, "learning_rate": 1.5805019736097104e-06, "loss": 10.3434, "step": 159 }, { "epoch": 2.791304347826087, "grad_norm": 0.031146962195634842, "learning_rate": 1.3477564710088098e-06, "loss": 10.3266, "step": 160 }, { "epoch": 2.8086956521739133, "grad_norm": 0.03667284548282623, "learning_rate": 1.1333070874682216e-06, "loss": 10.3912, "step": 161 }, { "epoch": 2.8260869565217392, "grad_norm": 0.03156570345163345, "learning_rate": 9.372344686307655e-07, "loss": 10.374, "step": 162 }, { "epoch": 2.8434782608695652, "grad_norm": 0.052136778831481934, "learning_rate": 7.596123493895991e-07, "loss": 10.2962, "step": 163 }, { "epoch": 2.860869565217391, "grad_norm": 0.04135267063975334, "learning_rate": 6.005075261595494e-07, "loss": 10.3693, "step": 164 }, { "epoch": 2.878260869565217, "grad_norm": 0.06279294937849045, "learning_rate": 4.5997983175773417e-07, "loss": 10.3712, "step": 165 }, { "epoch": 2.8956521739130436, "grad_norm": 0.04232044145464897, "learning_rate": 3.380821129028489e-07, "loss": 10.3312, "step": 166 }, { "epoch": 2.9130434782608696, "grad_norm": 0.047194790095090866, "learning_rate": 2.3486021034170857e-07, "loss": 10.3911, "step": 167 }, { "epoch": 2.9304347826086956, "grad_norm": 0.03097592294216156, "learning_rate": 1.503529416103988e-07, "loss": 10.3411, "step": 168 }, { "epoch": 2.9478260869565216, "grad_norm": 0.04218338429927826, "learning_rate": 8.459208643659122e-08, "loss": 10.3954, "step": 169 }, { "epoch": 2.965217391304348, "grad_norm": 0.048050738871097565, "learning_rate": 3.760237478849793e-08, "loss": 10.5368, "step": 170 }, { "epoch": 2.982608695652174, "grad_norm": 0.054054055362939835, "learning_rate": 9.401477574932926e-09, "loss": 10.3503, "step": 171 }, { "epoch": 3.0043478260869567, "grad_norm": 0.06945475935935974, "learning_rate": 0.0, "loss": 18.0215, "step": 172 } ], "logging_steps": 1, "max_steps": 172, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 19186713624576.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }