{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 2164, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0009242144177449168, "grad_norm": 61.5, "learning_rate": 1.8348623853211012e-07, "loss": 3.1195, "step": 1 }, { "epoch": 0.0018484288354898336, "grad_norm": 61.5, "learning_rate": 3.6697247706422023e-07, "loss": 3.2083, "step": 2 }, { "epoch": 0.0027726432532347504, "grad_norm": 62.75, "learning_rate": 5.504587155963304e-07, "loss": 3.1632, "step": 3 }, { "epoch": 0.0036968576709796672, "grad_norm": 61.25, "learning_rate": 7.339449541284405e-07, "loss": 3.0885, "step": 4 }, { "epoch": 0.0046210720887245845, "grad_norm": 63.5, "learning_rate": 9.174311926605506e-07, "loss": 3.1428, "step": 5 }, { "epoch": 0.005545286506469501, "grad_norm": 57.75, "learning_rate": 1.1009174311926608e-06, "loss": 3.0217, "step": 6 }, { "epoch": 0.006469500924214418, "grad_norm": 56.75, "learning_rate": 1.2844036697247707e-06, "loss": 3.0014, "step": 7 }, { "epoch": 0.0073937153419593345, "grad_norm": 57.75, "learning_rate": 1.467889908256881e-06, "loss": 2.9919, "step": 8 }, { "epoch": 0.00831792975970425, "grad_norm": 56.5, "learning_rate": 1.6513761467889911e-06, "loss": 2.9574, "step": 9 }, { "epoch": 0.009242144177449169, "grad_norm": 56.75, "learning_rate": 1.8348623853211011e-06, "loss": 2.971, "step": 10 }, { "epoch": 0.010166358595194085, "grad_norm": 62.0, "learning_rate": 2.0183486238532113e-06, "loss": 3.1683, "step": 11 }, { "epoch": 0.011090573012939002, "grad_norm": 55.75, "learning_rate": 2.2018348623853215e-06, "loss": 2.9952, "step": 12 }, { "epoch": 0.012014787430683918, "grad_norm": 54.75, "learning_rate": 2.3853211009174317e-06, "loss": 2.9961, "step": 13 }, { "epoch": 0.012939001848428836, "grad_norm": 54.0, "learning_rate": 2.5688073394495415e-06, "loss": 2.9625, "step": 14 }, { "epoch": 0.013863216266173753, "grad_norm": 55.5, "learning_rate": 2.7522935779816517e-06, "loss": 3.0311, "step": 15 }, { "epoch": 0.014787430683918669, "grad_norm": 45.5, "learning_rate": 2.935779816513762e-06, "loss": 2.8209, "step": 16 }, { "epoch": 0.015711645101663587, "grad_norm": 46.75, "learning_rate": 3.119266055045872e-06, "loss": 2.8857, "step": 17 }, { "epoch": 0.0166358595194085, "grad_norm": 40.5, "learning_rate": 3.3027522935779823e-06, "loss": 2.7243, "step": 18 }, { "epoch": 0.01756007393715342, "grad_norm": 37.0, "learning_rate": 3.486238532110092e-06, "loss": 2.605, "step": 19 }, { "epoch": 0.018484288354898338, "grad_norm": 36.0, "learning_rate": 3.6697247706422022e-06, "loss": 2.6036, "step": 20 }, { "epoch": 0.019408502772643253, "grad_norm": 39.0, "learning_rate": 3.853211009174313e-06, "loss": 2.6174, "step": 21 }, { "epoch": 0.02033271719038817, "grad_norm": 26.125, "learning_rate": 4.036697247706423e-06, "loss": 2.4368, "step": 22 }, { "epoch": 0.021256931608133085, "grad_norm": 26.75, "learning_rate": 4.220183486238532e-06, "loss": 2.4722, "step": 23 }, { "epoch": 0.022181146025878003, "grad_norm": 22.75, "learning_rate": 4.403669724770643e-06, "loss": 2.4402, "step": 24 }, { "epoch": 0.02310536044362292, "grad_norm": 18.875, "learning_rate": 4.587155963302753e-06, "loss": 2.458, "step": 25 }, { "epoch": 0.024029574861367836, "grad_norm": 18.625, "learning_rate": 4.770642201834863e-06, "loss": 2.332, "step": 26 }, { "epoch": 0.024953789279112754, "grad_norm": 13.1875, "learning_rate": 4.954128440366973e-06, "loss": 2.2852, "step": 27 }, { "epoch": 0.025878003696857672, "grad_norm": 11.9375, "learning_rate": 5.137614678899083e-06, "loss": 2.3154, "step": 28 }, { "epoch": 0.026802218114602587, "grad_norm": 10.5625, "learning_rate": 5.3211009174311936e-06, "loss": 2.3205, "step": 29 }, { "epoch": 0.027726432532347505, "grad_norm": 8.5, "learning_rate": 5.504587155963303e-06, "loss": 2.3319, "step": 30 }, { "epoch": 0.02865064695009242, "grad_norm": 8.875, "learning_rate": 5.688073394495414e-06, "loss": 2.2993, "step": 31 }, { "epoch": 0.029574861367837338, "grad_norm": 7.6875, "learning_rate": 5.871559633027524e-06, "loss": 2.3397, "step": 32 }, { "epoch": 0.030499075785582256, "grad_norm": 7.03125, "learning_rate": 6.0550458715596335e-06, "loss": 2.1804, "step": 33 }, { "epoch": 0.031423290203327174, "grad_norm": 11.625, "learning_rate": 6.238532110091744e-06, "loss": 2.1016, "step": 34 }, { "epoch": 0.03234750462107209, "grad_norm": 18.875, "learning_rate": 6.422018348623854e-06, "loss": 2.1097, "step": 35 }, { "epoch": 0.033271719038817, "grad_norm": 5.6875, "learning_rate": 6.6055045871559645e-06, "loss": 2.0425, "step": 36 }, { "epoch": 0.034195933456561925, "grad_norm": 4.9375, "learning_rate": 6.788990825688074e-06, "loss": 2.1982, "step": 37 }, { "epoch": 0.03512014787430684, "grad_norm": 5.09375, "learning_rate": 6.972477064220184e-06, "loss": 2.1257, "step": 38 }, { "epoch": 0.036044362292051754, "grad_norm": 4.5625, "learning_rate": 7.155963302752295e-06, "loss": 2.1028, "step": 39 }, { "epoch": 0.036968576709796676, "grad_norm": 4.40625, "learning_rate": 7.3394495412844045e-06, "loss": 2.0811, "step": 40 }, { "epoch": 0.03789279112754159, "grad_norm": 6.09375, "learning_rate": 7.522935779816515e-06, "loss": 2.1204, "step": 41 }, { "epoch": 0.038817005545286505, "grad_norm": 4.09375, "learning_rate": 7.706422018348626e-06, "loss": 2.0407, "step": 42 }, { "epoch": 0.03974121996303143, "grad_norm": 3.875, "learning_rate": 7.889908256880735e-06, "loss": 2.0729, "step": 43 }, { "epoch": 0.04066543438077634, "grad_norm": 4.1875, "learning_rate": 8.073394495412845e-06, "loss": 2.0598, "step": 44 }, { "epoch": 0.041589648798521256, "grad_norm": 3.609375, "learning_rate": 8.256880733944956e-06, "loss": 2.0645, "step": 45 }, { "epoch": 0.04251386321626617, "grad_norm": 3.390625, "learning_rate": 8.440366972477065e-06, "loss": 2.0481, "step": 46 }, { "epoch": 0.04343807763401109, "grad_norm": 3.5, "learning_rate": 8.623853211009175e-06, "loss": 2.0675, "step": 47 }, { "epoch": 0.04436229205175601, "grad_norm": 3.234375, "learning_rate": 8.807339449541286e-06, "loss": 2.0347, "step": 48 }, { "epoch": 0.04528650646950092, "grad_norm": 3.640625, "learning_rate": 8.990825688073395e-06, "loss": 2.024, "step": 49 }, { "epoch": 0.04621072088724584, "grad_norm": 3.359375, "learning_rate": 9.174311926605506e-06, "loss": 1.9933, "step": 50 }, { "epoch": 0.04713493530499076, "grad_norm": 3.171875, "learning_rate": 9.357798165137616e-06, "loss": 1.9816, "step": 51 }, { "epoch": 0.04805914972273567, "grad_norm": 4.65625, "learning_rate": 9.541284403669727e-06, "loss": 2.0251, "step": 52 }, { "epoch": 0.048983364140480594, "grad_norm": 3.171875, "learning_rate": 9.724770642201836e-06, "loss": 2.1156, "step": 53 }, { "epoch": 0.04990757855822551, "grad_norm": 3.109375, "learning_rate": 9.908256880733946e-06, "loss": 2.0297, "step": 54 }, { "epoch": 0.05083179297597042, "grad_norm": 3.078125, "learning_rate": 1.0091743119266055e-05, "loss": 1.9833, "step": 55 }, { "epoch": 0.051756007393715345, "grad_norm": 3.078125, "learning_rate": 1.0275229357798166e-05, "loss": 2.0087, "step": 56 }, { "epoch": 0.05268022181146026, "grad_norm": 2.96875, "learning_rate": 1.0458715596330275e-05, "loss": 2.0128, "step": 57 }, { "epoch": 0.053604436229205174, "grad_norm": 3.0625, "learning_rate": 1.0642201834862387e-05, "loss": 1.9904, "step": 58 }, { "epoch": 0.054528650646950096, "grad_norm": 3.078125, "learning_rate": 1.0825688073394496e-05, "loss": 2.0955, "step": 59 }, { "epoch": 0.05545286506469501, "grad_norm": 3.046875, "learning_rate": 1.1009174311926607e-05, "loss": 2.023, "step": 60 }, { "epoch": 0.056377079482439925, "grad_norm": 3.046875, "learning_rate": 1.1192660550458716e-05, "loss": 2.0217, "step": 61 }, { "epoch": 0.05730129390018484, "grad_norm": 2.96875, "learning_rate": 1.1376146788990828e-05, "loss": 1.9281, "step": 62 }, { "epoch": 0.05822550831792976, "grad_norm": 2.9375, "learning_rate": 1.1559633027522937e-05, "loss": 2.0056, "step": 63 }, { "epoch": 0.059149722735674676, "grad_norm": 3.046875, "learning_rate": 1.1743119266055047e-05, "loss": 2.021, "step": 64 }, { "epoch": 0.06007393715341959, "grad_norm": 3.0, "learning_rate": 1.1926605504587156e-05, "loss": 1.9854, "step": 65 }, { "epoch": 0.06099815157116451, "grad_norm": 3.109375, "learning_rate": 1.2110091743119267e-05, "loss": 2.0083, "step": 66 }, { "epoch": 0.06192236598890943, "grad_norm": 2.953125, "learning_rate": 1.2293577981651376e-05, "loss": 1.9579, "step": 67 }, { "epoch": 0.06284658040665435, "grad_norm": 2.984375, "learning_rate": 1.2477064220183488e-05, "loss": 1.9632, "step": 68 }, { "epoch": 0.06377079482439926, "grad_norm": 3.125, "learning_rate": 1.2660550458715597e-05, "loss": 1.9839, "step": 69 }, { "epoch": 0.06469500924214418, "grad_norm": 3.03125, "learning_rate": 1.2844036697247708e-05, "loss": 2.0081, "step": 70 }, { "epoch": 0.06561922365988909, "grad_norm": 3.0, "learning_rate": 1.3027522935779817e-05, "loss": 1.8978, "step": 71 }, { "epoch": 0.066543438077634, "grad_norm": 2.984375, "learning_rate": 1.3211009174311929e-05, "loss": 1.9977, "step": 72 }, { "epoch": 0.06746765249537892, "grad_norm": 2.953125, "learning_rate": 1.3394495412844038e-05, "loss": 1.9309, "step": 73 }, { "epoch": 0.06839186691312385, "grad_norm": 3.09375, "learning_rate": 1.3577981651376149e-05, "loss": 1.9731, "step": 74 }, { "epoch": 0.06931608133086876, "grad_norm": 2.953125, "learning_rate": 1.3761467889908258e-05, "loss": 1.8775, "step": 75 }, { "epoch": 0.07024029574861368, "grad_norm": 3.125, "learning_rate": 1.3944954128440368e-05, "loss": 1.884, "step": 76 }, { "epoch": 0.0711645101663586, "grad_norm": 3.09375, "learning_rate": 1.4128440366972477e-05, "loss": 1.9861, "step": 77 }, { "epoch": 0.07208872458410351, "grad_norm": 3.1875, "learning_rate": 1.431192660550459e-05, "loss": 1.9244, "step": 78 }, { "epoch": 0.07301293900184842, "grad_norm": 2.953125, "learning_rate": 1.4495412844036698e-05, "loss": 1.9323, "step": 79 }, { "epoch": 0.07393715341959335, "grad_norm": 3.015625, "learning_rate": 1.4678899082568809e-05, "loss": 1.887, "step": 80 }, { "epoch": 0.07486136783733827, "grad_norm": 3.09375, "learning_rate": 1.4862385321100918e-05, "loss": 1.928, "step": 81 }, { "epoch": 0.07578558225508318, "grad_norm": 3.203125, "learning_rate": 1.504587155963303e-05, "loss": 1.8454, "step": 82 }, { "epoch": 0.0767097966728281, "grad_norm": 3.03125, "learning_rate": 1.5229357798165139e-05, "loss": 1.9323, "step": 83 }, { "epoch": 0.07763401109057301, "grad_norm": 2.953125, "learning_rate": 1.541284403669725e-05, "loss": 1.8914, "step": 84 }, { "epoch": 0.07855822550831792, "grad_norm": 2.90625, "learning_rate": 1.559633027522936e-05, "loss": 1.7869, "step": 85 }, { "epoch": 0.07948243992606285, "grad_norm": 2.90625, "learning_rate": 1.577981651376147e-05, "loss": 1.9624, "step": 86 }, { "epoch": 0.08040665434380777, "grad_norm": 2.78125, "learning_rate": 1.5963302752293578e-05, "loss": 1.9335, "step": 87 }, { "epoch": 0.08133086876155268, "grad_norm": 2.71875, "learning_rate": 1.614678899082569e-05, "loss": 1.8357, "step": 88 }, { "epoch": 0.0822550831792976, "grad_norm": 2.921875, "learning_rate": 1.63302752293578e-05, "loss": 1.8874, "step": 89 }, { "epoch": 0.08317929759704251, "grad_norm": 2.765625, "learning_rate": 1.6513761467889912e-05, "loss": 1.9741, "step": 90 }, { "epoch": 0.08410351201478743, "grad_norm": 2.65625, "learning_rate": 1.669724770642202e-05, "loss": 1.8833, "step": 91 }, { "epoch": 0.08502772643253234, "grad_norm": 2.96875, "learning_rate": 1.688073394495413e-05, "loss": 1.8588, "step": 92 }, { "epoch": 0.08595194085027727, "grad_norm": 2.703125, "learning_rate": 1.706422018348624e-05, "loss": 1.9266, "step": 93 }, { "epoch": 0.08687615526802218, "grad_norm": 2.578125, "learning_rate": 1.724770642201835e-05, "loss": 1.8432, "step": 94 }, { "epoch": 0.0878003696857671, "grad_norm": 2.9375, "learning_rate": 1.743119266055046e-05, "loss": 1.9043, "step": 95 }, { "epoch": 0.08872458410351201, "grad_norm": 2.546875, "learning_rate": 1.7614678899082572e-05, "loss": 1.8099, "step": 96 }, { "epoch": 0.08964879852125693, "grad_norm": 2.53125, "learning_rate": 1.779816513761468e-05, "loss": 1.8868, "step": 97 }, { "epoch": 0.09057301293900184, "grad_norm": 2.828125, "learning_rate": 1.798165137614679e-05, "loss": 1.8583, "step": 98 }, { "epoch": 0.09149722735674677, "grad_norm": 2.734375, "learning_rate": 1.81651376146789e-05, "loss": 1.881, "step": 99 }, { "epoch": 0.09242144177449169, "grad_norm": 2.671875, "learning_rate": 1.834862385321101e-05, "loss": 1.8457, "step": 100 }, { "epoch": 0.0933456561922366, "grad_norm": 2.65625, "learning_rate": 1.853211009174312e-05, "loss": 1.8356, "step": 101 }, { "epoch": 0.09426987060998152, "grad_norm": 2.546875, "learning_rate": 1.8715596330275232e-05, "loss": 1.8369, "step": 102 }, { "epoch": 0.09519408502772643, "grad_norm": 2.703125, "learning_rate": 1.889908256880734e-05, "loss": 1.9394, "step": 103 }, { "epoch": 0.09611829944547134, "grad_norm": 2.453125, "learning_rate": 1.9082568807339454e-05, "loss": 1.8417, "step": 104 }, { "epoch": 0.09704251386321626, "grad_norm": 2.65625, "learning_rate": 1.9266055045871563e-05, "loss": 1.8609, "step": 105 }, { "epoch": 0.09796672828096119, "grad_norm": 2.53125, "learning_rate": 1.944954128440367e-05, "loss": 1.8663, "step": 106 }, { "epoch": 0.0988909426987061, "grad_norm": 2.65625, "learning_rate": 1.963302752293578e-05, "loss": 1.8382, "step": 107 }, { "epoch": 0.09981515711645102, "grad_norm": 2.625, "learning_rate": 1.9816513761467893e-05, "loss": 1.854, "step": 108 }, { "epoch": 0.10073937153419593, "grad_norm": 2.515625, "learning_rate": 2e-05, "loss": 1.7959, "step": 109 }, { "epoch": 0.10166358595194085, "grad_norm": 2.640625, "learning_rate": 1.999998831453463e-05, "loss": 1.8384, "step": 110 }, { "epoch": 0.10258780036968576, "grad_norm": 2.625, "learning_rate": 1.9999953258165833e-05, "loss": 1.8335, "step": 111 }, { "epoch": 0.10351201478743069, "grad_norm": 2.578125, "learning_rate": 1.9999894830975532e-05, "loss": 1.8598, "step": 112 }, { "epoch": 0.1044362292051756, "grad_norm": 2.546875, "learning_rate": 1.999981303310028e-05, "loss": 1.7008, "step": 113 }, { "epoch": 0.10536044362292052, "grad_norm": 2.53125, "learning_rate": 1.9999707864731247e-05, "loss": 1.8535, "step": 114 }, { "epoch": 0.10628465804066543, "grad_norm": 2.515625, "learning_rate": 1.9999579326114224e-05, "loss": 1.8864, "step": 115 }, { "epoch": 0.10720887245841035, "grad_norm": 2.625, "learning_rate": 1.9999427417549612e-05, "loss": 1.8675, "step": 116 }, { "epoch": 0.10813308687615526, "grad_norm": 2.609375, "learning_rate": 1.999925213939244e-05, "loss": 1.8583, "step": 117 }, { "epoch": 0.10905730129390019, "grad_norm": 2.46875, "learning_rate": 1.9999053492052347e-05, "loss": 1.8889, "step": 118 }, { "epoch": 0.1099815157116451, "grad_norm": 2.765625, "learning_rate": 1.999883147599359e-05, "loss": 1.8804, "step": 119 }, { "epoch": 0.11090573012939002, "grad_norm": 2.609375, "learning_rate": 1.9998586091735044e-05, "loss": 1.8546, "step": 120 }, { "epoch": 0.11182994454713494, "grad_norm": 2.609375, "learning_rate": 1.9998317339850192e-05, "loss": 1.8745, "step": 121 }, { "epoch": 0.11275415896487985, "grad_norm": 2.65625, "learning_rate": 1.9998025220967134e-05, "loss": 1.7908, "step": 122 }, { "epoch": 0.11367837338262476, "grad_norm": 2.546875, "learning_rate": 1.9997709735768578e-05, "loss": 1.8669, "step": 123 }, { "epoch": 0.11460258780036968, "grad_norm": 2.5, "learning_rate": 1.9997370884991842e-05, "loss": 1.8663, "step": 124 }, { "epoch": 0.11552680221811461, "grad_norm": 2.53125, "learning_rate": 1.999700866942885e-05, "loss": 1.8389, "step": 125 }, { "epoch": 0.11645101663585952, "grad_norm": 2.578125, "learning_rate": 1.9996623089926138e-05, "loss": 1.864, "step": 126 }, { "epoch": 0.11737523105360444, "grad_norm": 2.390625, "learning_rate": 1.999621414738484e-05, "loss": 1.8493, "step": 127 }, { "epoch": 0.11829944547134935, "grad_norm": 2.5, "learning_rate": 1.999578184276069e-05, "loss": 1.8744, "step": 128 }, { "epoch": 0.11922365988909427, "grad_norm": 2.484375, "learning_rate": 1.999532617706403e-05, "loss": 1.8266, "step": 129 }, { "epoch": 0.12014787430683918, "grad_norm": 2.53125, "learning_rate": 1.9994847151359786e-05, "loss": 1.8983, "step": 130 }, { "epoch": 0.12107208872458411, "grad_norm": 2.375, "learning_rate": 1.9994344766767485e-05, "loss": 1.8332, "step": 131 }, { "epoch": 0.12199630314232902, "grad_norm": 2.40625, "learning_rate": 1.9993819024461255e-05, "loss": 1.7669, "step": 132 }, { "epoch": 0.12292051756007394, "grad_norm": 2.578125, "learning_rate": 1.9993269925669798e-05, "loss": 1.8584, "step": 133 }, { "epoch": 0.12384473197781885, "grad_norm": 2.640625, "learning_rate": 1.9992697471676413e-05, "loss": 1.8922, "step": 134 }, { "epoch": 0.12476894639556377, "grad_norm": 2.546875, "learning_rate": 1.9992101663818976e-05, "loss": 1.9421, "step": 135 }, { "epoch": 0.1256931608133087, "grad_norm": 2.828125, "learning_rate": 1.999148250348994e-05, "loss": 1.921, "step": 136 }, { "epoch": 0.1266173752310536, "grad_norm": 2.640625, "learning_rate": 1.9990839992136354e-05, "loss": 1.801, "step": 137 }, { "epoch": 0.12754158964879853, "grad_norm": 2.59375, "learning_rate": 1.9990174131259813e-05, "loss": 1.7953, "step": 138 }, { "epoch": 0.12846580406654343, "grad_norm": 2.546875, "learning_rate": 1.9989484922416503e-05, "loss": 1.8359, "step": 139 }, { "epoch": 0.12939001848428835, "grad_norm": 2.5, "learning_rate": 1.998877236721717e-05, "loss": 1.8894, "step": 140 }, { "epoch": 0.13031423290203328, "grad_norm": 2.890625, "learning_rate": 1.9988036467327116e-05, "loss": 1.9296, "step": 141 }, { "epoch": 0.13123844731977818, "grad_norm": 2.4375, "learning_rate": 1.9987277224466216e-05, "loss": 1.7654, "step": 142 }, { "epoch": 0.1321626617375231, "grad_norm": 2.453125, "learning_rate": 1.998649464040888e-05, "loss": 1.8479, "step": 143 }, { "epoch": 0.133086876155268, "grad_norm": 2.484375, "learning_rate": 1.998568871698409e-05, "loss": 1.9108, "step": 144 }, { "epoch": 0.13401109057301294, "grad_norm": 2.53125, "learning_rate": 1.998485945607536e-05, "loss": 1.8495, "step": 145 }, { "epoch": 0.13493530499075784, "grad_norm": 2.375, "learning_rate": 1.998400685962075e-05, "loss": 1.7533, "step": 146 }, { "epoch": 0.13585951940850277, "grad_norm": 2.421875, "learning_rate": 1.998313092961286e-05, "loss": 1.8244, "step": 147 }, { "epoch": 0.1367837338262477, "grad_norm": 2.4375, "learning_rate": 1.9982231668098814e-05, "loss": 1.835, "step": 148 }, { "epoch": 0.1377079482439926, "grad_norm": 2.40625, "learning_rate": 1.9981309077180273e-05, "loss": 1.7918, "step": 149 }, { "epoch": 0.13863216266173753, "grad_norm": 2.421875, "learning_rate": 1.9980363159013422e-05, "loss": 1.8796, "step": 150 }, { "epoch": 0.13955637707948243, "grad_norm": 2.421875, "learning_rate": 1.997939391580895e-05, "loss": 1.8271, "step": 151 }, { "epoch": 0.14048059149722736, "grad_norm": 2.5, "learning_rate": 1.997840134983208e-05, "loss": 1.8558, "step": 152 }, { "epoch": 0.1414048059149723, "grad_norm": 2.421875, "learning_rate": 1.9977385463402522e-05, "loss": 1.7849, "step": 153 }, { "epoch": 0.1423290203327172, "grad_norm": 2.421875, "learning_rate": 1.9976346258894502e-05, "loss": 1.792, "step": 154 }, { "epoch": 0.14325323475046212, "grad_norm": 2.71875, "learning_rate": 1.9975283738736734e-05, "loss": 1.8889, "step": 155 }, { "epoch": 0.14417744916820702, "grad_norm": 2.734375, "learning_rate": 1.997419790541243e-05, "loss": 1.9106, "step": 156 }, { "epoch": 0.14510166358595195, "grad_norm": 2.46875, "learning_rate": 1.9973088761459286e-05, "loss": 1.8254, "step": 157 }, { "epoch": 0.14602587800369685, "grad_norm": 2.578125, "learning_rate": 1.9971956309469468e-05, "loss": 1.8388, "step": 158 }, { "epoch": 0.14695009242144177, "grad_norm": 2.484375, "learning_rate": 1.9970800552089623e-05, "loss": 1.8595, "step": 159 }, { "epoch": 0.1478743068391867, "grad_norm": 2.421875, "learning_rate": 1.996962149202087e-05, "loss": 1.8562, "step": 160 }, { "epoch": 0.1487985212569316, "grad_norm": 2.46875, "learning_rate": 1.9968419132018774e-05, "loss": 1.896, "step": 161 }, { "epoch": 0.14972273567467653, "grad_norm": 2.515625, "learning_rate": 1.9967193474893367e-05, "loss": 1.8293, "step": 162 }, { "epoch": 0.15064695009242143, "grad_norm": 2.59375, "learning_rate": 1.996594452350912e-05, "loss": 1.8038, "step": 163 }, { "epoch": 0.15157116451016636, "grad_norm": 2.453125, "learning_rate": 1.9964672280784953e-05, "loss": 1.9295, "step": 164 }, { "epoch": 0.15249537892791126, "grad_norm": 2.609375, "learning_rate": 1.9963376749694215e-05, "loss": 1.8757, "step": 165 }, { "epoch": 0.1534195933456562, "grad_norm": 2.375, "learning_rate": 1.996205793326468e-05, "loss": 1.7343, "step": 166 }, { "epoch": 0.15434380776340112, "grad_norm": 2.359375, "learning_rate": 1.996071583457855e-05, "loss": 1.8195, "step": 167 }, { "epoch": 0.15526802218114602, "grad_norm": 2.46875, "learning_rate": 1.9959350456772424e-05, "loss": 1.796, "step": 168 }, { "epoch": 0.15619223659889095, "grad_norm": 2.578125, "learning_rate": 1.9957961803037325e-05, "loss": 1.9167, "step": 169 }, { "epoch": 0.15711645101663585, "grad_norm": 2.390625, "learning_rate": 1.9956549876618668e-05, "loss": 1.8011, "step": 170 }, { "epoch": 0.15804066543438078, "grad_norm": 2.40625, "learning_rate": 1.9955114680816253e-05, "loss": 1.8689, "step": 171 }, { "epoch": 0.1589648798521257, "grad_norm": 2.625, "learning_rate": 1.9953656218984263e-05, "loss": 1.8865, "step": 172 }, { "epoch": 0.1598890942698706, "grad_norm": 2.515625, "learning_rate": 1.9952174494531266e-05, "loss": 1.8288, "step": 173 }, { "epoch": 0.16081330868761554, "grad_norm": 2.40625, "learning_rate": 1.9950669510920184e-05, "loss": 1.9267, "step": 174 }, { "epoch": 0.16173752310536044, "grad_norm": 2.375, "learning_rate": 1.9949141271668305e-05, "loss": 1.785, "step": 175 }, { "epoch": 0.16266173752310537, "grad_norm": 2.40625, "learning_rate": 1.994758978034727e-05, "loss": 1.8777, "step": 176 }, { "epoch": 0.16358595194085027, "grad_norm": 2.5, "learning_rate": 1.994601504058306e-05, "loss": 1.7895, "step": 177 }, { "epoch": 0.1645101663585952, "grad_norm": 2.46875, "learning_rate": 1.9944417056055978e-05, "loss": 1.934, "step": 178 }, { "epoch": 0.16543438077634012, "grad_norm": 2.390625, "learning_rate": 1.994279583050067e-05, "loss": 1.7927, "step": 179 }, { "epoch": 0.16635859519408502, "grad_norm": 2.4375, "learning_rate": 1.9941151367706097e-05, "loss": 1.8511, "step": 180 }, { "epoch": 0.16728280961182995, "grad_norm": 2.40625, "learning_rate": 1.9939483671515508e-05, "loss": 1.8345, "step": 181 }, { "epoch": 0.16820702402957485, "grad_norm": 2.3125, "learning_rate": 1.993779274582647e-05, "loss": 1.8126, "step": 182 }, { "epoch": 0.16913123844731978, "grad_norm": 2.484375, "learning_rate": 1.993607859459084e-05, "loss": 1.7867, "step": 183 }, { "epoch": 0.17005545286506468, "grad_norm": 2.4375, "learning_rate": 1.993434122181474e-05, "loss": 1.8409, "step": 184 }, { "epoch": 0.1709796672828096, "grad_norm": 2.3125, "learning_rate": 1.9932580631558575e-05, "loss": 1.8303, "step": 185 }, { "epoch": 0.17190388170055454, "grad_norm": 2.546875, "learning_rate": 1.993079682793701e-05, "loss": 1.7322, "step": 186 }, { "epoch": 0.17282809611829944, "grad_norm": 2.390625, "learning_rate": 1.9928989815118963e-05, "loss": 1.7711, "step": 187 }, { "epoch": 0.17375231053604437, "grad_norm": 2.46875, "learning_rate": 1.9927159597327583e-05, "loss": 1.8086, "step": 188 }, { "epoch": 0.17467652495378927, "grad_norm": 2.4375, "learning_rate": 1.9925306178840263e-05, "loss": 1.8853, "step": 189 }, { "epoch": 0.1756007393715342, "grad_norm": 2.515625, "learning_rate": 1.9923429563988613e-05, "loss": 1.8893, "step": 190 }, { "epoch": 0.17652495378927913, "grad_norm": 2.53125, "learning_rate": 1.992152975715846e-05, "loss": 1.8385, "step": 191 }, { "epoch": 0.17744916820702403, "grad_norm": 7.625, "learning_rate": 1.991960676278983e-05, "loss": 1.8494, "step": 192 }, { "epoch": 0.17837338262476896, "grad_norm": 2.578125, "learning_rate": 1.9917660585376936e-05, "loss": 1.7777, "step": 193 }, { "epoch": 0.17929759704251386, "grad_norm": 5.59375, "learning_rate": 1.991569122946818e-05, "loss": 1.8623, "step": 194 }, { "epoch": 0.18022181146025879, "grad_norm": 2.5625, "learning_rate": 1.9913698699666128e-05, "loss": 1.8877, "step": 195 }, { "epoch": 0.18114602587800369, "grad_norm": 2.375, "learning_rate": 1.9911683000627504e-05, "loss": 1.8179, "step": 196 }, { "epoch": 0.18207024029574861, "grad_norm": 2.546875, "learning_rate": 1.9909644137063186e-05, "loss": 1.8584, "step": 197 }, { "epoch": 0.18299445471349354, "grad_norm": 2.59375, "learning_rate": 1.9907582113738195e-05, "loss": 1.7721, "step": 198 }, { "epoch": 0.18391866913123844, "grad_norm": 2.40625, "learning_rate": 1.990549693547166e-05, "loss": 1.8218, "step": 199 }, { "epoch": 0.18484288354898337, "grad_norm": 2.34375, "learning_rate": 1.9903388607136846e-05, "loss": 1.8184, "step": 200 }, { "epoch": 0.18576709796672827, "grad_norm": 2.515625, "learning_rate": 1.9901257133661106e-05, "loss": 1.8708, "step": 201 }, { "epoch": 0.1866913123844732, "grad_norm": 2.578125, "learning_rate": 1.9899102520025896e-05, "loss": 1.828, "step": 202 }, { "epoch": 0.1876155268022181, "grad_norm": 2.34375, "learning_rate": 1.989692477126675e-05, "loss": 1.7782, "step": 203 }, { "epoch": 0.18853974121996303, "grad_norm": 2.453125, "learning_rate": 1.9894723892473264e-05, "loss": 1.8454, "step": 204 }, { "epoch": 0.18946395563770796, "grad_norm": 2.375, "learning_rate": 1.9892499888789097e-05, "loss": 1.8232, "step": 205 }, { "epoch": 0.19038817005545286, "grad_norm": 2.515625, "learning_rate": 1.9890252765411962e-05, "loss": 1.8581, "step": 206 }, { "epoch": 0.1913123844731978, "grad_norm": 2.65625, "learning_rate": 1.9887982527593584e-05, "loss": 1.859, "step": 207 }, { "epoch": 0.1922365988909427, "grad_norm": 2.375, "learning_rate": 1.9885689180639725e-05, "loss": 1.8113, "step": 208 }, { "epoch": 0.19316081330868762, "grad_norm": 2.71875, "learning_rate": 1.9883372729910154e-05, "loss": 1.7921, "step": 209 }, { "epoch": 0.19408502772643252, "grad_norm": 2.546875, "learning_rate": 1.9881033180818622e-05, "loss": 1.8889, "step": 210 }, { "epoch": 0.19500924214417745, "grad_norm": 2.375, "learning_rate": 1.9878670538832882e-05, "loss": 1.7741, "step": 211 }, { "epoch": 0.19593345656192238, "grad_norm": 2.640625, "learning_rate": 1.9876284809474647e-05, "loss": 1.8682, "step": 212 }, { "epoch": 0.19685767097966728, "grad_norm": 2.421875, "learning_rate": 1.9873875998319585e-05, "loss": 1.8699, "step": 213 }, { "epoch": 0.1977818853974122, "grad_norm": 2.578125, "learning_rate": 1.987144411099731e-05, "loss": 1.8366, "step": 214 }, { "epoch": 0.1987060998151571, "grad_norm": 2.4375, "learning_rate": 1.986898915319138e-05, "loss": 1.8025, "step": 215 }, { "epoch": 0.19963031423290203, "grad_norm": 2.5, "learning_rate": 1.9866511130639247e-05, "loss": 1.882, "step": 216 }, { "epoch": 0.20055452865064696, "grad_norm": 2.484375, "learning_rate": 1.9864010049132287e-05, "loss": 1.7892, "step": 217 }, { "epoch": 0.20147874306839186, "grad_norm": 2.53125, "learning_rate": 1.986148591451576e-05, "loss": 1.8574, "step": 218 }, { "epoch": 0.2024029574861368, "grad_norm": 2.40625, "learning_rate": 1.9858938732688803e-05, "loss": 1.739, "step": 219 }, { "epoch": 0.2033271719038817, "grad_norm": 2.578125, "learning_rate": 1.9856368509604414e-05, "loss": 1.8676, "step": 220 }, { "epoch": 0.20425138632162662, "grad_norm": 2.421875, "learning_rate": 1.985377525126945e-05, "loss": 1.7483, "step": 221 }, { "epoch": 0.20517560073937152, "grad_norm": 2.484375, "learning_rate": 1.985115896374459e-05, "loss": 1.8347, "step": 222 }, { "epoch": 0.20609981515711645, "grad_norm": 2.515625, "learning_rate": 1.9848519653144348e-05, "loss": 1.7917, "step": 223 }, { "epoch": 0.20702402957486138, "grad_norm": 2.453125, "learning_rate": 1.984585732563703e-05, "loss": 1.7943, "step": 224 }, { "epoch": 0.20794824399260628, "grad_norm": 2.375, "learning_rate": 1.9843171987444753e-05, "loss": 1.8078, "step": 225 }, { "epoch": 0.2088724584103512, "grad_norm": 2.65625, "learning_rate": 1.9840463644843397e-05, "loss": 1.7609, "step": 226 }, { "epoch": 0.2097966728280961, "grad_norm": 2.453125, "learning_rate": 1.9837732304162612e-05, "loss": 1.7535, "step": 227 }, { "epoch": 0.21072088724584104, "grad_norm": 2.875, "learning_rate": 1.9834977971785792e-05, "loss": 1.8484, "step": 228 }, { "epoch": 0.21164510166358594, "grad_norm": 2.328125, "learning_rate": 1.9832200654150077e-05, "loss": 1.8863, "step": 229 }, { "epoch": 0.21256931608133087, "grad_norm": 2.625, "learning_rate": 1.9829400357746307e-05, "loss": 1.8598, "step": 230 }, { "epoch": 0.2134935304990758, "grad_norm": 2.484375, "learning_rate": 1.9826577089119038e-05, "loss": 1.8361, "step": 231 }, { "epoch": 0.2144177449168207, "grad_norm": 2.390625, "learning_rate": 1.982373085486651e-05, "loss": 1.7892, "step": 232 }, { "epoch": 0.21534195933456562, "grad_norm": 2.375, "learning_rate": 1.9820861661640646e-05, "loss": 1.8467, "step": 233 }, { "epoch": 0.21626617375231053, "grad_norm": 2.5, "learning_rate": 1.981796951614701e-05, "loss": 1.8468, "step": 234 }, { "epoch": 0.21719038817005545, "grad_norm": 2.5, "learning_rate": 1.9815054425144817e-05, "loss": 1.8043, "step": 235 }, { "epoch": 0.21811460258780038, "grad_norm": 5.71875, "learning_rate": 1.9812116395446906e-05, "loss": 1.8287, "step": 236 }, { "epoch": 0.21903881700554528, "grad_norm": 2.328125, "learning_rate": 1.9809155433919726e-05, "loss": 1.7701, "step": 237 }, { "epoch": 0.2199630314232902, "grad_norm": 2.484375, "learning_rate": 1.9806171547483317e-05, "loss": 1.8887, "step": 238 }, { "epoch": 0.2208872458410351, "grad_norm": 2.765625, "learning_rate": 1.9803164743111304e-05, "loss": 1.7298, "step": 239 }, { "epoch": 0.22181146025878004, "grad_norm": 2.390625, "learning_rate": 1.9800135027830866e-05, "loss": 1.8693, "step": 240 }, { "epoch": 0.22273567467652494, "grad_norm": 2.65625, "learning_rate": 1.979708240872273e-05, "loss": 1.9084, "step": 241 }, { "epoch": 0.22365988909426987, "grad_norm": 3.3125, "learning_rate": 1.979400689292115e-05, "loss": 1.8188, "step": 242 }, { "epoch": 0.2245841035120148, "grad_norm": 2.71875, "learning_rate": 1.9790908487613895e-05, "loss": 1.7873, "step": 243 }, { "epoch": 0.2255083179297597, "grad_norm": 15.5, "learning_rate": 1.9787787200042224e-05, "loss": 1.8445, "step": 244 }, { "epoch": 0.22643253234750463, "grad_norm": 3.375, "learning_rate": 1.9784643037500877e-05, "loss": 1.8383, "step": 245 }, { "epoch": 0.22735674676524953, "grad_norm": 6.84375, "learning_rate": 1.9781476007338058e-05, "loss": 1.834, "step": 246 }, { "epoch": 0.22828096118299446, "grad_norm": 3.15625, "learning_rate": 1.9778286116955407e-05, "loss": 1.7983, "step": 247 }, { "epoch": 0.22920517560073936, "grad_norm": 3.125, "learning_rate": 1.9775073373807995e-05, "loss": 1.8401, "step": 248 }, { "epoch": 0.2301293900184843, "grad_norm": 2.890625, "learning_rate": 1.9771837785404308e-05, "loss": 1.8153, "step": 249 }, { "epoch": 0.23105360443622922, "grad_norm": 29.75, "learning_rate": 1.9768579359306204e-05, "loss": 1.8784, "step": 250 }, { "epoch": 0.23197781885397412, "grad_norm": 2.484375, "learning_rate": 1.9765298103128945e-05, "loss": 1.8219, "step": 251 }, { "epoch": 0.23290203327171904, "grad_norm": 2.484375, "learning_rate": 1.9761994024541118e-05, "loss": 1.7413, "step": 252 }, { "epoch": 0.23382624768946395, "grad_norm": 2.40625, "learning_rate": 1.975866713126467e-05, "loss": 1.8455, "step": 253 }, { "epoch": 0.23475046210720887, "grad_norm": 2.515625, "learning_rate": 1.9755317431074857e-05, "loss": 1.7784, "step": 254 }, { "epoch": 0.2356746765249538, "grad_norm": 2.515625, "learning_rate": 1.975194493180024e-05, "loss": 1.836, "step": 255 }, { "epoch": 0.2365988909426987, "grad_norm": 2.40625, "learning_rate": 1.9748549641322668e-05, "loss": 1.7377, "step": 256 }, { "epoch": 0.23752310536044363, "grad_norm": 2.546875, "learning_rate": 1.974513156757725e-05, "loss": 1.853, "step": 257 }, { "epoch": 0.23844731977818853, "grad_norm": 2.515625, "learning_rate": 1.974169071855233e-05, "loss": 1.7921, "step": 258 }, { "epoch": 0.23937153419593346, "grad_norm": 2.375, "learning_rate": 1.973822710228951e-05, "loss": 1.8444, "step": 259 }, { "epoch": 0.24029574861367836, "grad_norm": 2.4375, "learning_rate": 1.9734740726883572e-05, "loss": 1.8632, "step": 260 }, { "epoch": 0.2412199630314233, "grad_norm": 2.5, "learning_rate": 1.9731231600482507e-05, "loss": 1.7819, "step": 261 }, { "epoch": 0.24214417744916822, "grad_norm": 2.484375, "learning_rate": 1.9727699731287465e-05, "loss": 1.7956, "step": 262 }, { "epoch": 0.24306839186691312, "grad_norm": 2.53125, "learning_rate": 1.9724145127552756e-05, "loss": 1.8289, "step": 263 }, { "epoch": 0.24399260628465805, "grad_norm": 2.40625, "learning_rate": 1.9720567797585815e-05, "loss": 1.7983, "step": 264 }, { "epoch": 0.24491682070240295, "grad_norm": 2.296875, "learning_rate": 1.9716967749747206e-05, "loss": 1.8215, "step": 265 }, { "epoch": 0.24584103512014788, "grad_norm": 2.421875, "learning_rate": 1.9713344992450564e-05, "loss": 1.871, "step": 266 }, { "epoch": 0.24676524953789278, "grad_norm": 2.4375, "learning_rate": 1.9709699534162615e-05, "loss": 1.8487, "step": 267 }, { "epoch": 0.2476894639556377, "grad_norm": 2.421875, "learning_rate": 1.9706031383403133e-05, "loss": 1.8694, "step": 268 }, { "epoch": 0.24861367837338263, "grad_norm": 2.40625, "learning_rate": 1.970234054874493e-05, "loss": 1.8236, "step": 269 }, { "epoch": 0.24953789279112754, "grad_norm": 2.6875, "learning_rate": 1.969862703881383e-05, "loss": 1.7475, "step": 270 }, { "epoch": 0.25046210720887246, "grad_norm": 2.375, "learning_rate": 1.9694890862288647e-05, "loss": 1.7394, "step": 271 }, { "epoch": 0.2513863216266174, "grad_norm": 2.5, "learning_rate": 1.969113202790118e-05, "loss": 1.8577, "step": 272 }, { "epoch": 0.25231053604436227, "grad_norm": 2.703125, "learning_rate": 1.9687350544436167e-05, "loss": 1.8408, "step": 273 }, { "epoch": 0.2532347504621072, "grad_norm": 2.5625, "learning_rate": 1.9683546420731292e-05, "loss": 1.813, "step": 274 }, { "epoch": 0.2541589648798521, "grad_norm": 2.515625, "learning_rate": 1.9679719665677148e-05, "loss": 1.7782, "step": 275 }, { "epoch": 0.25508317929759705, "grad_norm": 2.75, "learning_rate": 1.967587028821721e-05, "loss": 1.8207, "step": 276 }, { "epoch": 0.256007393715342, "grad_norm": 2.46875, "learning_rate": 1.967199829734784e-05, "loss": 1.8807, "step": 277 }, { "epoch": 0.25693160813308685, "grad_norm": 2.421875, "learning_rate": 1.9668103702118238e-05, "loss": 1.8008, "step": 278 }, { "epoch": 0.2578558225508318, "grad_norm": 2.359375, "learning_rate": 1.9664186511630433e-05, "loss": 1.8277, "step": 279 }, { "epoch": 0.2587800369685767, "grad_norm": 2.765625, "learning_rate": 1.966024673503927e-05, "loss": 1.8582, "step": 280 }, { "epoch": 0.25970425138632164, "grad_norm": 2.359375, "learning_rate": 1.9656284381552364e-05, "loss": 1.729, "step": 281 }, { "epoch": 0.26062846580406657, "grad_norm": 2.53125, "learning_rate": 1.965229946043011e-05, "loss": 1.8339, "step": 282 }, { "epoch": 0.26155268022181144, "grad_norm": 2.453125, "learning_rate": 1.964829198098564e-05, "loss": 1.8304, "step": 283 }, { "epoch": 0.26247689463955637, "grad_norm": 2.59375, "learning_rate": 1.9644261952584804e-05, "loss": 1.8421, "step": 284 }, { "epoch": 0.2634011090573013, "grad_norm": 2.828125, "learning_rate": 1.9640209384646155e-05, "loss": 1.909, "step": 285 }, { "epoch": 0.2643253234750462, "grad_norm": 2.34375, "learning_rate": 1.963613428664092e-05, "loss": 1.8871, "step": 286 }, { "epoch": 0.26524953789279115, "grad_norm": 2.5625, "learning_rate": 1.9632036668092988e-05, "loss": 1.8236, "step": 287 }, { "epoch": 0.266173752310536, "grad_norm": 2.984375, "learning_rate": 1.9627916538578865e-05, "loss": 1.8041, "step": 288 }, { "epoch": 0.26709796672828096, "grad_norm": 2.375, "learning_rate": 1.9623773907727682e-05, "loss": 1.7939, "step": 289 }, { "epoch": 0.2680221811460259, "grad_norm": 2.4375, "learning_rate": 1.9619608785221155e-05, "loss": 1.8597, "step": 290 }, { "epoch": 0.2689463955637708, "grad_norm": 2.453125, "learning_rate": 1.9615421180793564e-05, "loss": 1.8199, "step": 291 }, { "epoch": 0.2698706099815157, "grad_norm": 2.5625, "learning_rate": 1.9611211104231724e-05, "loss": 1.8084, "step": 292 }, { "epoch": 0.2707948243992606, "grad_norm": 2.375, "learning_rate": 1.960697856537498e-05, "loss": 1.8826, "step": 293 }, { "epoch": 0.27171903881700554, "grad_norm": 2.375, "learning_rate": 1.960272357411517e-05, "loss": 1.805, "step": 294 }, { "epoch": 0.27264325323475047, "grad_norm": 2.546875, "learning_rate": 1.9598446140396607e-05, "loss": 1.7651, "step": 295 }, { "epoch": 0.2735674676524954, "grad_norm": 2.671875, "learning_rate": 1.959414627421604e-05, "loss": 1.7758, "step": 296 }, { "epoch": 0.2744916820702403, "grad_norm": 2.53125, "learning_rate": 1.958982398562267e-05, "loss": 1.8417, "step": 297 }, { "epoch": 0.2754158964879852, "grad_norm": 2.390625, "learning_rate": 1.9585479284718077e-05, "loss": 1.7954, "step": 298 }, { "epoch": 0.27634011090573013, "grad_norm": 2.34375, "learning_rate": 1.9581112181656242e-05, "loss": 1.8072, "step": 299 }, { "epoch": 0.27726432532347506, "grad_norm": 2.296875, "learning_rate": 1.9576722686643483e-05, "loss": 1.7432, "step": 300 }, { "epoch": 0.27818853974122, "grad_norm": 2.5, "learning_rate": 1.957231080993846e-05, "loss": 1.8771, "step": 301 }, { "epoch": 0.27911275415896486, "grad_norm": 2.359375, "learning_rate": 1.9567876561852147e-05, "loss": 1.8472, "step": 302 }, { "epoch": 0.2800369685767098, "grad_norm": 2.3125, "learning_rate": 1.956341995274778e-05, "loss": 1.8367, "step": 303 }, { "epoch": 0.2809611829944547, "grad_norm": 2.375, "learning_rate": 1.9558940993040885e-05, "loss": 1.7979, "step": 304 }, { "epoch": 0.28188539741219965, "grad_norm": 2.375, "learning_rate": 1.9554439693199194e-05, "loss": 1.8235, "step": 305 }, { "epoch": 0.2828096118299446, "grad_norm": 2.4375, "learning_rate": 1.9549916063742673e-05, "loss": 1.8021, "step": 306 }, { "epoch": 0.28373382624768945, "grad_norm": 2.359375, "learning_rate": 1.9545370115243463e-05, "loss": 1.8305, "step": 307 }, { "epoch": 0.2846580406654344, "grad_norm": 2.734375, "learning_rate": 1.9540801858325865e-05, "loss": 1.9333, "step": 308 }, { "epoch": 0.2855822550831793, "grad_norm": 3.875, "learning_rate": 1.9536211303666324e-05, "loss": 1.8398, "step": 309 }, { "epoch": 0.28650646950092423, "grad_norm": 2.875, "learning_rate": 1.953159846199339e-05, "loss": 1.8228, "step": 310 }, { "epoch": 0.2874306839186691, "grad_norm": 2.484375, "learning_rate": 1.9526963344087714e-05, "loss": 1.8473, "step": 311 }, { "epoch": 0.28835489833641403, "grad_norm": 2.515625, "learning_rate": 1.9522305960781986e-05, "loss": 1.835, "step": 312 }, { "epoch": 0.28927911275415896, "grad_norm": 2.359375, "learning_rate": 1.9517626322960946e-05, "loss": 1.7645, "step": 313 }, { "epoch": 0.2902033271719039, "grad_norm": 2.328125, "learning_rate": 1.9512924441561348e-05, "loss": 1.7436, "step": 314 }, { "epoch": 0.2911275415896488, "grad_norm": 2.40625, "learning_rate": 1.950820032757192e-05, "loss": 1.7826, "step": 315 }, { "epoch": 0.2920517560073937, "grad_norm": 2.40625, "learning_rate": 1.950345399203337e-05, "loss": 1.7733, "step": 316 }, { "epoch": 0.2929759704251386, "grad_norm": 2.453125, "learning_rate": 1.9498685446038306e-05, "loss": 1.7706, "step": 317 }, { "epoch": 0.29390018484288355, "grad_norm": 2.40625, "learning_rate": 1.949389470073128e-05, "loss": 1.8443, "step": 318 }, { "epoch": 0.2948243992606285, "grad_norm": 2.46875, "learning_rate": 1.9489081767308696e-05, "loss": 1.8424, "step": 319 }, { "epoch": 0.2957486136783734, "grad_norm": 2.296875, "learning_rate": 1.948424665701884e-05, "loss": 1.7751, "step": 320 }, { "epoch": 0.2966728280961183, "grad_norm": 2.390625, "learning_rate": 1.947938938116181e-05, "loss": 1.8094, "step": 321 }, { "epoch": 0.2975970425138632, "grad_norm": 2.375, "learning_rate": 1.9474509951089508e-05, "loss": 1.8415, "step": 322 }, { "epoch": 0.29852125693160814, "grad_norm": 2.453125, "learning_rate": 1.9469608378205624e-05, "loss": 1.7953, "step": 323 }, { "epoch": 0.29944547134935307, "grad_norm": 2.390625, "learning_rate": 1.9464684673965583e-05, "loss": 1.8729, "step": 324 }, { "epoch": 0.300369685767098, "grad_norm": 2.3125, "learning_rate": 1.9459738849876545e-05, "loss": 1.7748, "step": 325 }, { "epoch": 0.30129390018484287, "grad_norm": 2.484375, "learning_rate": 1.9454770917497357e-05, "loss": 1.828, "step": 326 }, { "epoch": 0.3022181146025878, "grad_norm": 2.453125, "learning_rate": 1.944978088843854e-05, "loss": 1.8357, "step": 327 }, { "epoch": 0.3031423290203327, "grad_norm": 2.390625, "learning_rate": 1.944476877436226e-05, "loss": 1.7777, "step": 328 }, { "epoch": 0.30406654343807765, "grad_norm": 2.3125, "learning_rate": 1.9439734586982292e-05, "loss": 1.8503, "step": 329 }, { "epoch": 0.3049907578558225, "grad_norm": 2.359375, "learning_rate": 1.9434678338063997e-05, "loss": 1.8526, "step": 330 }, { "epoch": 0.30591497227356745, "grad_norm": 2.359375, "learning_rate": 1.9429600039424306e-05, "loss": 1.804, "step": 331 }, { "epoch": 0.3068391866913124, "grad_norm": 2.46875, "learning_rate": 1.942449970293167e-05, "loss": 1.7715, "step": 332 }, { "epoch": 0.3077634011090573, "grad_norm": 2.484375, "learning_rate": 1.9419377340506055e-05, "loss": 1.7458, "step": 333 }, { "epoch": 0.30868761552680224, "grad_norm": 2.359375, "learning_rate": 1.9414232964118893e-05, "loss": 1.8572, "step": 334 }, { "epoch": 0.3096118299445471, "grad_norm": 2.40625, "learning_rate": 1.9409066585793072e-05, "loss": 1.8761, "step": 335 }, { "epoch": 0.31053604436229204, "grad_norm": 2.390625, "learning_rate": 1.9403878217602905e-05, "loss": 1.7616, "step": 336 }, { "epoch": 0.31146025878003697, "grad_norm": 2.328125, "learning_rate": 1.9398667871674082e-05, "loss": 1.8547, "step": 337 }, { "epoch": 0.3123844731977819, "grad_norm": 2.34375, "learning_rate": 1.9393435560183676e-05, "loss": 1.8155, "step": 338 }, { "epoch": 0.3133086876155268, "grad_norm": 2.53125, "learning_rate": 1.9388181295360077e-05, "loss": 1.8441, "step": 339 }, { "epoch": 0.3142329020332717, "grad_norm": 2.421875, "learning_rate": 1.9382905089482994e-05, "loss": 1.7487, "step": 340 }, { "epoch": 0.31515711645101663, "grad_norm": 2.453125, "learning_rate": 1.937760695488342e-05, "loss": 1.8294, "step": 341 }, { "epoch": 0.31608133086876156, "grad_norm": 2.484375, "learning_rate": 1.9372286903943572e-05, "loss": 1.8724, "step": 342 }, { "epoch": 0.3170055452865065, "grad_norm": 2.640625, "learning_rate": 1.9366944949096917e-05, "loss": 1.8414, "step": 343 }, { "epoch": 0.3179297597042514, "grad_norm": 2.40625, "learning_rate": 1.9361581102828095e-05, "loss": 1.8197, "step": 344 }, { "epoch": 0.3188539741219963, "grad_norm": 2.375, "learning_rate": 1.935619537767292e-05, "loss": 1.7762, "step": 345 }, { "epoch": 0.3197781885397412, "grad_norm": 2.40625, "learning_rate": 1.9350787786218325e-05, "loss": 1.8243, "step": 346 }, { "epoch": 0.32070240295748614, "grad_norm": 2.375, "learning_rate": 1.9345358341102358e-05, "loss": 1.8216, "step": 347 }, { "epoch": 0.32162661737523107, "grad_norm": 2.328125, "learning_rate": 1.933990705501414e-05, "loss": 1.8647, "step": 348 }, { "epoch": 0.32255083179297594, "grad_norm": 2.4375, "learning_rate": 1.933443394069383e-05, "loss": 1.7994, "step": 349 }, { "epoch": 0.3234750462107209, "grad_norm": 2.5625, "learning_rate": 1.9328939010932607e-05, "loss": 1.7937, "step": 350 }, { "epoch": 0.3243992606284658, "grad_norm": 2.421875, "learning_rate": 1.9323422278572635e-05, "loss": 1.8238, "step": 351 }, { "epoch": 0.32532347504621073, "grad_norm": 2.34375, "learning_rate": 1.9317883756507026e-05, "loss": 1.7627, "step": 352 }, { "epoch": 0.32624768946395566, "grad_norm": 2.609375, "learning_rate": 1.9312323457679827e-05, "loss": 1.8865, "step": 353 }, { "epoch": 0.32717190388170053, "grad_norm": 2.546875, "learning_rate": 1.9306741395085977e-05, "loss": 1.7641, "step": 354 }, { "epoch": 0.32809611829944546, "grad_norm": 2.34375, "learning_rate": 1.9301137581771266e-05, "loss": 1.763, "step": 355 }, { "epoch": 0.3290203327171904, "grad_norm": 2.4375, "learning_rate": 1.9295512030832336e-05, "loss": 1.7827, "step": 356 }, { "epoch": 0.3299445471349353, "grad_norm": 2.6875, "learning_rate": 1.9289864755416615e-05, "loss": 1.6917, "step": 357 }, { "epoch": 0.33086876155268025, "grad_norm": 2.578125, "learning_rate": 1.928419576872232e-05, "loss": 1.7584, "step": 358 }, { "epoch": 0.3317929759704251, "grad_norm": 2.625, "learning_rate": 1.9278505083998393e-05, "loss": 1.8805, "step": 359 }, { "epoch": 0.33271719038817005, "grad_norm": 3.1875, "learning_rate": 1.9272792714544493e-05, "loss": 1.8331, "step": 360 }, { "epoch": 0.333641404805915, "grad_norm": 2.765625, "learning_rate": 1.9267058673710967e-05, "loss": 1.8396, "step": 361 }, { "epoch": 0.3345656192236599, "grad_norm": 2.859375, "learning_rate": 1.9261302974898798e-05, "loss": 1.8088, "step": 362 }, { "epoch": 0.33548983364140483, "grad_norm": 2.296875, "learning_rate": 1.925552563155959e-05, "loss": 1.843, "step": 363 }, { "epoch": 0.3364140480591497, "grad_norm": 2.625, "learning_rate": 1.9249726657195534e-05, "loss": 1.8551, "step": 364 }, { "epoch": 0.33733826247689463, "grad_norm": 2.953125, "learning_rate": 1.924390606535937e-05, "loss": 1.846, "step": 365 }, { "epoch": 0.33826247689463956, "grad_norm": 3.0625, "learning_rate": 1.923806386965436e-05, "loss": 1.8198, "step": 366 }, { "epoch": 0.3391866913123845, "grad_norm": 2.34375, "learning_rate": 1.9232200083734267e-05, "loss": 1.7839, "step": 367 }, { "epoch": 0.34011090573012936, "grad_norm": 2.65625, "learning_rate": 1.92263147213033e-05, "loss": 1.9284, "step": 368 }, { "epoch": 0.3410351201478743, "grad_norm": 2.578125, "learning_rate": 1.9220407796116098e-05, "loss": 1.8056, "step": 369 }, { "epoch": 0.3419593345656192, "grad_norm": 2.78125, "learning_rate": 1.9214479321977697e-05, "loss": 1.8224, "step": 370 }, { "epoch": 0.34288354898336415, "grad_norm": 2.4375, "learning_rate": 1.920852931274349e-05, "loss": 1.8813, "step": 371 }, { "epoch": 0.3438077634011091, "grad_norm": 2.609375, "learning_rate": 1.9202557782319207e-05, "loss": 1.8575, "step": 372 }, { "epoch": 0.34473197781885395, "grad_norm": 2.640625, "learning_rate": 1.9196564744660867e-05, "loss": 1.7455, "step": 373 }, { "epoch": 0.3456561922365989, "grad_norm": 2.421875, "learning_rate": 1.9190550213774757e-05, "loss": 1.832, "step": 374 }, { "epoch": 0.3465804066543438, "grad_norm": 2.3125, "learning_rate": 1.9184514203717394e-05, "loss": 1.8495, "step": 375 }, { "epoch": 0.34750462107208874, "grad_norm": 2.34375, "learning_rate": 1.9178456728595497e-05, "loss": 1.7797, "step": 376 }, { "epoch": 0.34842883548983367, "grad_norm": 2.375, "learning_rate": 1.9172377802565957e-05, "loss": 1.8169, "step": 377 }, { "epoch": 0.34935304990757854, "grad_norm": 2.375, "learning_rate": 1.9166277439835773e-05, "loss": 1.795, "step": 378 }, { "epoch": 0.35027726432532347, "grad_norm": 2.390625, "learning_rate": 1.9160155654662075e-05, "loss": 1.7354, "step": 379 }, { "epoch": 0.3512014787430684, "grad_norm": 2.296875, "learning_rate": 1.9154012461352035e-05, "loss": 1.6805, "step": 380 }, { "epoch": 0.3521256931608133, "grad_norm": 2.609375, "learning_rate": 1.914784787426288e-05, "loss": 1.7878, "step": 381 }, { "epoch": 0.35304990757855825, "grad_norm": 2.640625, "learning_rate": 1.9141661907801808e-05, "loss": 1.8455, "step": 382 }, { "epoch": 0.3539741219963031, "grad_norm": 2.375, "learning_rate": 1.913545457642601e-05, "loss": 1.7985, "step": 383 }, { "epoch": 0.35489833641404805, "grad_norm": 2.296875, "learning_rate": 1.9129225894642594e-05, "loss": 1.8432, "step": 384 }, { "epoch": 0.355822550831793, "grad_norm": 2.71875, "learning_rate": 1.9122975877008567e-05, "loss": 1.7886, "step": 385 }, { "epoch": 0.3567467652495379, "grad_norm": 3.015625, "learning_rate": 1.9116704538130803e-05, "loss": 1.8201, "step": 386 }, { "epoch": 0.3576709796672828, "grad_norm": 2.28125, "learning_rate": 1.9110411892666006e-05, "loss": 1.8003, "step": 387 }, { "epoch": 0.3585951940850277, "grad_norm": 2.609375, "learning_rate": 1.9104097955320672e-05, "loss": 1.9047, "step": 388 }, { "epoch": 0.35951940850277264, "grad_norm": 2.609375, "learning_rate": 1.9097762740851063e-05, "loss": 1.8118, "step": 389 }, { "epoch": 0.36044362292051757, "grad_norm": 2.671875, "learning_rate": 1.9091406264063163e-05, "loss": 1.8673, "step": 390 }, { "epoch": 0.3613678373382625, "grad_norm": 2.390625, "learning_rate": 1.9085028539812648e-05, "loss": 1.8354, "step": 391 }, { "epoch": 0.36229205175600737, "grad_norm": 2.484375, "learning_rate": 1.907862958300486e-05, "loss": 1.7845, "step": 392 }, { "epoch": 0.3632162661737523, "grad_norm": 2.5625, "learning_rate": 1.907220940859475e-05, "loss": 1.85, "step": 393 }, { "epoch": 0.36414048059149723, "grad_norm": 2.4375, "learning_rate": 1.9065768031586864e-05, "loss": 1.8279, "step": 394 }, { "epoch": 0.36506469500924216, "grad_norm": 2.28125, "learning_rate": 1.90593054670353e-05, "loss": 1.7901, "step": 395 }, { "epoch": 0.3659889094269871, "grad_norm": 2.515625, "learning_rate": 1.9052821730043676e-05, "loss": 1.7901, "step": 396 }, { "epoch": 0.36691312384473196, "grad_norm": 2.4375, "learning_rate": 1.9046316835765083e-05, "loss": 1.8505, "step": 397 }, { "epoch": 0.3678373382624769, "grad_norm": 2.546875, "learning_rate": 1.9039790799402072e-05, "loss": 1.8252, "step": 398 }, { "epoch": 0.3687615526802218, "grad_norm": 2.328125, "learning_rate": 1.903324363620659e-05, "loss": 1.7952, "step": 399 }, { "epoch": 0.36968576709796674, "grad_norm": 2.671875, "learning_rate": 1.902667536147997e-05, "loss": 1.8418, "step": 400 }, { "epoch": 0.3706099815157117, "grad_norm": 2.46875, "learning_rate": 1.9020085990572882e-05, "loss": 1.7914, "step": 401 }, { "epoch": 0.37153419593345655, "grad_norm": 2.40625, "learning_rate": 1.9013475538885297e-05, "loss": 1.7756, "step": 402 }, { "epoch": 0.3724584103512015, "grad_norm": 2.3125, "learning_rate": 1.900684402186646e-05, "loss": 1.7733, "step": 403 }, { "epoch": 0.3733826247689464, "grad_norm": 2.5, "learning_rate": 1.900019145501484e-05, "loss": 1.7646, "step": 404 }, { "epoch": 0.37430683918669133, "grad_norm": 2.296875, "learning_rate": 1.8993517853878105e-05, "loss": 1.7381, "step": 405 }, { "epoch": 0.3752310536044362, "grad_norm": 2.328125, "learning_rate": 1.898682323405308e-05, "loss": 1.7489, "step": 406 }, { "epoch": 0.37615526802218113, "grad_norm": 2.390625, "learning_rate": 1.8980107611185724e-05, "loss": 1.8204, "step": 407 }, { "epoch": 0.37707948243992606, "grad_norm": 2.375, "learning_rate": 1.897337100097106e-05, "loss": 1.781, "step": 408 }, { "epoch": 0.378003696857671, "grad_norm": 2.46875, "learning_rate": 1.896661341915318e-05, "loss": 1.8209, "step": 409 }, { "epoch": 0.3789279112754159, "grad_norm": 2.375, "learning_rate": 1.8959834881525183e-05, "loss": 1.8092, "step": 410 }, { "epoch": 0.3798521256931608, "grad_norm": 2.515625, "learning_rate": 1.895303540392914e-05, "loss": 1.8058, "step": 411 }, { "epoch": 0.3807763401109057, "grad_norm": 2.484375, "learning_rate": 1.894621500225606e-05, "loss": 1.8722, "step": 412 }, { "epoch": 0.38170055452865065, "grad_norm": 2.28125, "learning_rate": 1.8939373692445863e-05, "loss": 1.7865, "step": 413 }, { "epoch": 0.3826247689463956, "grad_norm": 2.703125, "learning_rate": 1.893251149048732e-05, "loss": 1.8205, "step": 414 }, { "epoch": 0.3835489833641405, "grad_norm": 2.46875, "learning_rate": 1.8925628412418042e-05, "loss": 1.8676, "step": 415 }, { "epoch": 0.3844731977818854, "grad_norm": 2.359375, "learning_rate": 1.8918724474324418e-05, "loss": 1.748, "step": 416 }, { "epoch": 0.3853974121996303, "grad_norm": 2.3125, "learning_rate": 1.8911799692341598e-05, "loss": 1.7704, "step": 417 }, { "epoch": 0.38632162661737524, "grad_norm": 2.5, "learning_rate": 1.8904854082653438e-05, "loss": 1.8642, "step": 418 }, { "epoch": 0.38724584103512016, "grad_norm": 2.640625, "learning_rate": 1.8897887661492476e-05, "loss": 1.7762, "step": 419 }, { "epoch": 0.38817005545286504, "grad_norm": 2.5625, "learning_rate": 1.889090044513989e-05, "loss": 1.8046, "step": 420 }, { "epoch": 0.38909426987060997, "grad_norm": 2.546875, "learning_rate": 1.888389244992545e-05, "loss": 1.7127, "step": 421 }, { "epoch": 0.3900184842883549, "grad_norm": 2.546875, "learning_rate": 1.8876863692227492e-05, "loss": 1.8352, "step": 422 }, { "epoch": 0.3909426987060998, "grad_norm": 2.6875, "learning_rate": 1.8869814188472883e-05, "loss": 1.8322, "step": 423 }, { "epoch": 0.39186691312384475, "grad_norm": 2.28125, "learning_rate": 1.8862743955136966e-05, "loss": 1.7639, "step": 424 }, { "epoch": 0.3927911275415896, "grad_norm": 2.34375, "learning_rate": 1.8855653008743533e-05, "loss": 1.759, "step": 425 }, { "epoch": 0.39371534195933455, "grad_norm": 2.453125, "learning_rate": 1.884854136586479e-05, "loss": 1.7579, "step": 426 }, { "epoch": 0.3946395563770795, "grad_norm": 2.359375, "learning_rate": 1.8841409043121306e-05, "loss": 1.8499, "step": 427 }, { "epoch": 0.3955637707948244, "grad_norm": 2.390625, "learning_rate": 1.8834256057181978e-05, "loss": 1.7694, "step": 428 }, { "epoch": 0.39648798521256934, "grad_norm": 2.40625, "learning_rate": 1.882708242476401e-05, "loss": 1.8417, "step": 429 }, { "epoch": 0.3974121996303142, "grad_norm": 2.375, "learning_rate": 1.881988816263284e-05, "loss": 1.7308, "step": 430 }, { "epoch": 0.39833641404805914, "grad_norm": 2.46875, "learning_rate": 1.8812673287602136e-05, "loss": 1.7876, "step": 431 }, { "epoch": 0.39926062846580407, "grad_norm": 2.40625, "learning_rate": 1.8805437816533723e-05, "loss": 1.8453, "step": 432 }, { "epoch": 0.400184842883549, "grad_norm": 2.40625, "learning_rate": 1.879818176633758e-05, "loss": 1.8424, "step": 433 }, { "epoch": 0.4011090573012939, "grad_norm": 2.359375, "learning_rate": 1.879090515397176e-05, "loss": 1.7315, "step": 434 }, { "epoch": 0.4020332717190388, "grad_norm": 2.40625, "learning_rate": 1.8783607996442393e-05, "loss": 1.8596, "step": 435 }, { "epoch": 0.4029574861367837, "grad_norm": 2.34375, "learning_rate": 1.8776290310803616e-05, "loss": 1.8376, "step": 436 }, { "epoch": 0.40388170055452866, "grad_norm": 2.5, "learning_rate": 1.8768952114157535e-05, "loss": 1.8336, "step": 437 }, { "epoch": 0.4048059149722736, "grad_norm": 2.453125, "learning_rate": 1.8761593423654202e-05, "loss": 1.8435, "step": 438 }, { "epoch": 0.40573012939001846, "grad_norm": 2.28125, "learning_rate": 1.8754214256491564e-05, "loss": 1.729, "step": 439 }, { "epoch": 0.4066543438077634, "grad_norm": 2.3125, "learning_rate": 1.8746814629915414e-05, "loss": 1.7737, "step": 440 }, { "epoch": 0.4075785582255083, "grad_norm": 2.421875, "learning_rate": 1.8739394561219378e-05, "loss": 1.751, "step": 441 }, { "epoch": 0.40850277264325324, "grad_norm": 2.375, "learning_rate": 1.8731954067744835e-05, "loss": 1.8782, "step": 442 }, { "epoch": 0.40942698706099817, "grad_norm": 2.4375, "learning_rate": 1.8724493166880924e-05, "loss": 1.7271, "step": 443 }, { "epoch": 0.41035120147874304, "grad_norm": 2.421875, "learning_rate": 1.8717011876064453e-05, "loss": 1.8491, "step": 444 }, { "epoch": 0.411275415896488, "grad_norm": 2.40625, "learning_rate": 1.8709510212779906e-05, "loss": 1.7633, "step": 445 }, { "epoch": 0.4121996303142329, "grad_norm": 2.40625, "learning_rate": 1.870198819455936e-05, "loss": 1.7974, "step": 446 }, { "epoch": 0.41312384473197783, "grad_norm": 2.421875, "learning_rate": 1.8694445838982473e-05, "loss": 1.7966, "step": 447 }, { "epoch": 0.41404805914972276, "grad_norm": 2.328125, "learning_rate": 1.8686883163676434e-05, "loss": 1.752, "step": 448 }, { "epoch": 0.41497227356746763, "grad_norm": 2.515625, "learning_rate": 1.8679300186315918e-05, "loss": 1.8489, "step": 449 }, { "epoch": 0.41589648798521256, "grad_norm": 2.4375, "learning_rate": 1.867169692462305e-05, "loss": 1.8105, "step": 450 }, { "epoch": 0.4168207024029575, "grad_norm": 2.4375, "learning_rate": 1.8664073396367363e-05, "loss": 1.7778, "step": 451 }, { "epoch": 0.4177449168207024, "grad_norm": 2.3125, "learning_rate": 1.8656429619365744e-05, "loss": 1.6695, "step": 452 }, { "epoch": 0.41866913123844735, "grad_norm": 2.421875, "learning_rate": 1.8648765611482417e-05, "loss": 1.828, "step": 453 }, { "epoch": 0.4195933456561922, "grad_norm": 2.328125, "learning_rate": 1.864108139062888e-05, "loss": 1.8154, "step": 454 }, { "epoch": 0.42051756007393715, "grad_norm": 2.40625, "learning_rate": 1.8633376974763874e-05, "loss": 1.8107, "step": 455 }, { "epoch": 0.4214417744916821, "grad_norm": 2.3125, "learning_rate": 1.8625652381893335e-05, "loss": 1.8369, "step": 456 }, { "epoch": 0.422365988909427, "grad_norm": 2.4375, "learning_rate": 1.8617907630070353e-05, "loss": 1.8016, "step": 457 }, { "epoch": 0.4232902033271719, "grad_norm": 2.328125, "learning_rate": 1.861014273739514e-05, "loss": 1.7897, "step": 458 }, { "epoch": 0.4242144177449168, "grad_norm": 2.4375, "learning_rate": 1.8602357722014966e-05, "loss": 1.7401, "step": 459 }, { "epoch": 0.42513863216266173, "grad_norm": 2.46875, "learning_rate": 1.859455260212414e-05, "loss": 1.7512, "step": 460 }, { "epoch": 0.42606284658040666, "grad_norm": 2.4375, "learning_rate": 1.8586727395963952e-05, "loss": 1.8365, "step": 461 }, { "epoch": 0.4269870609981516, "grad_norm": 2.390625, "learning_rate": 1.8578882121822643e-05, "loss": 1.817, "step": 462 }, { "epoch": 0.42791127541589646, "grad_norm": 2.515625, "learning_rate": 1.8571016798035343e-05, "loss": 1.714, "step": 463 }, { "epoch": 0.4288354898336414, "grad_norm": 2.5, "learning_rate": 1.8563131442984045e-05, "loss": 1.7522, "step": 464 }, { "epoch": 0.4297597042513863, "grad_norm": 2.328125, "learning_rate": 1.8555226075097558e-05, "loss": 1.7814, "step": 465 }, { "epoch": 0.43068391866913125, "grad_norm": 2.359375, "learning_rate": 1.8547300712851467e-05, "loss": 1.8426, "step": 466 }, { "epoch": 0.4316081330868762, "grad_norm": 2.484375, "learning_rate": 1.853935537476808e-05, "loss": 1.7951, "step": 467 }, { "epoch": 0.43253234750462105, "grad_norm": 2.4375, "learning_rate": 1.8531390079416386e-05, "loss": 1.7889, "step": 468 }, { "epoch": 0.433456561922366, "grad_norm": 2.390625, "learning_rate": 1.8523404845412028e-05, "loss": 1.807, "step": 469 }, { "epoch": 0.4343807763401109, "grad_norm": 2.375, "learning_rate": 1.8515399691417236e-05, "loss": 1.7887, "step": 470 }, { "epoch": 0.43530499075785584, "grad_norm": 2.328125, "learning_rate": 1.8507374636140806e-05, "loss": 1.7585, "step": 471 }, { "epoch": 0.43622920517560076, "grad_norm": 2.3125, "learning_rate": 1.8499329698338035e-05, "loss": 1.8336, "step": 472 }, { "epoch": 0.43715341959334564, "grad_norm": 2.5, "learning_rate": 1.8491264896810694e-05, "loss": 1.8155, "step": 473 }, { "epoch": 0.43807763401109057, "grad_norm": 2.4375, "learning_rate": 1.8483180250406973e-05, "loss": 1.8227, "step": 474 }, { "epoch": 0.4390018484288355, "grad_norm": 2.59375, "learning_rate": 1.847507577802144e-05, "loss": 1.8449, "step": 475 }, { "epoch": 0.4399260628465804, "grad_norm": 2.59375, "learning_rate": 1.8466951498595005e-05, "loss": 1.8046, "step": 476 }, { "epoch": 0.4408502772643253, "grad_norm": 2.546875, "learning_rate": 1.8458807431114863e-05, "loss": 1.8108, "step": 477 }, { "epoch": 0.4417744916820702, "grad_norm": 2.328125, "learning_rate": 1.8450643594614463e-05, "loss": 1.7701, "step": 478 }, { "epoch": 0.44269870609981515, "grad_norm": 2.8125, "learning_rate": 1.8442460008173447e-05, "loss": 1.8811, "step": 479 }, { "epoch": 0.4436229205175601, "grad_norm": 2.265625, "learning_rate": 1.8434256690917616e-05, "loss": 1.7186, "step": 480 }, { "epoch": 0.444547134935305, "grad_norm": 2.4375, "learning_rate": 1.842603366201889e-05, "loss": 1.7922, "step": 481 }, { "epoch": 0.4454713493530499, "grad_norm": 2.65625, "learning_rate": 1.841779094069525e-05, "loss": 1.7742, "step": 482 }, { "epoch": 0.4463955637707948, "grad_norm": 2.5, "learning_rate": 1.84095285462107e-05, "loss": 1.7208, "step": 483 }, { "epoch": 0.44731977818853974, "grad_norm": 2.359375, "learning_rate": 1.8401246497875238e-05, "loss": 1.8605, "step": 484 }, { "epoch": 0.44824399260628467, "grad_norm": 2.578125, "learning_rate": 1.8392944815044767e-05, "loss": 1.9009, "step": 485 }, { "epoch": 0.4491682070240296, "grad_norm": 2.671875, "learning_rate": 1.8384623517121095e-05, "loss": 1.8106, "step": 486 }, { "epoch": 0.45009242144177447, "grad_norm": 2.34375, "learning_rate": 1.837628262355188e-05, "loss": 1.7558, "step": 487 }, { "epoch": 0.4510166358595194, "grad_norm": 2.390625, "learning_rate": 1.8367922153830556e-05, "loss": 1.8173, "step": 488 }, { "epoch": 0.4519408502772643, "grad_norm": 2.453125, "learning_rate": 1.835954212749632e-05, "loss": 1.8143, "step": 489 }, { "epoch": 0.45286506469500926, "grad_norm": 2.546875, "learning_rate": 1.835114256413408e-05, "loss": 1.8251, "step": 490 }, { "epoch": 0.4537892791127542, "grad_norm": 2.453125, "learning_rate": 1.834272348337439e-05, "loss": 1.8316, "step": 491 }, { "epoch": 0.45471349353049906, "grad_norm": 2.296875, "learning_rate": 1.833428490489343e-05, "loss": 1.7902, "step": 492 }, { "epoch": 0.455637707948244, "grad_norm": 2.4375, "learning_rate": 1.8325826848412945e-05, "loss": 1.7923, "step": 493 }, { "epoch": 0.4565619223659889, "grad_norm": 2.265625, "learning_rate": 1.8317349333700192e-05, "loss": 1.6527, "step": 494 }, { "epoch": 0.45748613678373384, "grad_norm": 2.359375, "learning_rate": 1.830885238056792e-05, "loss": 1.8055, "step": 495 }, { "epoch": 0.4584103512014787, "grad_norm": 2.40625, "learning_rate": 1.8300336008874297e-05, "loss": 1.7207, "step": 496 }, { "epoch": 0.45933456561922364, "grad_norm": 2.359375, "learning_rate": 1.8291800238522872e-05, "loss": 1.8763, "step": 497 }, { "epoch": 0.4602587800369686, "grad_norm": 2.515625, "learning_rate": 1.828324508946254e-05, "loss": 1.7244, "step": 498 }, { "epoch": 0.4611829944547135, "grad_norm": 2.375, "learning_rate": 1.827467058168748e-05, "loss": 1.8059, "step": 499 }, { "epoch": 0.46210720887245843, "grad_norm": 2.25, "learning_rate": 1.8266076735237113e-05, "loss": 1.7651, "step": 500 }, { "epoch": 0.4630314232902033, "grad_norm": 2.375, "learning_rate": 1.825746357019606e-05, "loss": 1.8413, "step": 501 }, { "epoch": 0.46395563770794823, "grad_norm": 2.296875, "learning_rate": 1.8248831106694088e-05, "loss": 1.7642, "step": 502 }, { "epoch": 0.46487985212569316, "grad_norm": 2.328125, "learning_rate": 1.8240179364906064e-05, "loss": 1.7855, "step": 503 }, { "epoch": 0.4658040665434381, "grad_norm": 2.453125, "learning_rate": 1.8231508365051924e-05, "loss": 1.7445, "step": 504 }, { "epoch": 0.466728280961183, "grad_norm": 2.4375, "learning_rate": 1.822281812739659e-05, "loss": 1.8306, "step": 505 }, { "epoch": 0.4676524953789279, "grad_norm": 2.421875, "learning_rate": 1.8214108672249963e-05, "loss": 1.7766, "step": 506 }, { "epoch": 0.4685767097966728, "grad_norm": 2.5625, "learning_rate": 1.820538001996685e-05, "loss": 1.833, "step": 507 }, { "epoch": 0.46950092421441775, "grad_norm": 2.375, "learning_rate": 1.819663219094692e-05, "loss": 1.7733, "step": 508 }, { "epoch": 0.4704251386321627, "grad_norm": 2.609375, "learning_rate": 1.818786520563467e-05, "loss": 1.7451, "step": 509 }, { "epoch": 0.4713493530499076, "grad_norm": 2.515625, "learning_rate": 1.8179079084519356e-05, "loss": 1.915, "step": 510 }, { "epoch": 0.4722735674676525, "grad_norm": 2.34375, "learning_rate": 1.8170273848134958e-05, "loss": 1.7984, "step": 511 }, { "epoch": 0.4731977818853974, "grad_norm": 2.40625, "learning_rate": 1.8161449517060142e-05, "loss": 1.7826, "step": 512 }, { "epoch": 0.47412199630314233, "grad_norm": 2.515625, "learning_rate": 1.815260611191818e-05, "loss": 1.8685, "step": 513 }, { "epoch": 0.47504621072088726, "grad_norm": 2.40625, "learning_rate": 1.8143743653376944e-05, "loss": 1.7808, "step": 514 }, { "epoch": 0.47597042513863214, "grad_norm": 2.34375, "learning_rate": 1.8134862162148816e-05, "loss": 1.7452, "step": 515 }, { "epoch": 0.47689463955637706, "grad_norm": 2.296875, "learning_rate": 1.8125961658990673e-05, "loss": 1.7051, "step": 516 }, { "epoch": 0.477818853974122, "grad_norm": 2.46875, "learning_rate": 1.8117042164703816e-05, "loss": 1.8248, "step": 517 }, { "epoch": 0.4787430683918669, "grad_norm": 2.53125, "learning_rate": 1.8108103700133936e-05, "loss": 1.8173, "step": 518 }, { "epoch": 0.47966728280961185, "grad_norm": 2.609375, "learning_rate": 1.809914628617105e-05, "loss": 1.7394, "step": 519 }, { "epoch": 0.4805914972273567, "grad_norm": 2.328125, "learning_rate": 1.8090169943749477e-05, "loss": 1.8523, "step": 520 }, { "epoch": 0.48151571164510165, "grad_norm": 2.328125, "learning_rate": 1.8081174693847758e-05, "loss": 1.8535, "step": 521 }, { "epoch": 0.4824399260628466, "grad_norm": 2.390625, "learning_rate": 1.8072160557488632e-05, "loss": 1.8176, "step": 522 }, { "epoch": 0.4833641404805915, "grad_norm": 2.484375, "learning_rate": 1.806312755573898e-05, "loss": 1.8457, "step": 523 }, { "epoch": 0.48428835489833644, "grad_norm": 2.546875, "learning_rate": 1.805407570970976e-05, "loss": 1.8794, "step": 524 }, { "epoch": 0.4852125693160813, "grad_norm": 2.328125, "learning_rate": 1.804500504055598e-05, "loss": 1.7496, "step": 525 }, { "epoch": 0.48613678373382624, "grad_norm": 2.390625, "learning_rate": 1.8035915569476638e-05, "loss": 1.8289, "step": 526 }, { "epoch": 0.48706099815157117, "grad_norm": 2.46875, "learning_rate": 1.802680731771468e-05, "loss": 1.7759, "step": 527 }, { "epoch": 0.4879852125693161, "grad_norm": 2.328125, "learning_rate": 1.8017680306556928e-05, "loss": 1.7268, "step": 528 }, { "epoch": 0.488909426987061, "grad_norm": 2.515625, "learning_rate": 1.8008534557334064e-05, "loss": 1.8667, "step": 529 }, { "epoch": 0.4898336414048059, "grad_norm": 2.484375, "learning_rate": 1.7999370091420554e-05, "loss": 1.8681, "step": 530 }, { "epoch": 0.4907578558225508, "grad_norm": 2.3125, "learning_rate": 1.7990186930234606e-05, "loss": 1.7786, "step": 531 }, { "epoch": 0.49168207024029575, "grad_norm": 2.421875, "learning_rate": 1.7980985095238123e-05, "loss": 1.7993, "step": 532 }, { "epoch": 0.4926062846580407, "grad_norm": 2.484375, "learning_rate": 1.797176460793665e-05, "loss": 1.8106, "step": 533 }, { "epoch": 0.49353049907578556, "grad_norm": 2.40625, "learning_rate": 1.7962525489879324e-05, "loss": 1.7916, "step": 534 }, { "epoch": 0.4944547134935305, "grad_norm": 2.4375, "learning_rate": 1.7953267762658826e-05, "loss": 1.7595, "step": 535 }, { "epoch": 0.4953789279112754, "grad_norm": 2.375, "learning_rate": 1.7943991447911322e-05, "loss": 1.7203, "step": 536 }, { "epoch": 0.49630314232902034, "grad_norm": 2.46875, "learning_rate": 1.7934696567316426e-05, "loss": 1.7498, "step": 537 }, { "epoch": 0.49722735674676527, "grad_norm": 2.546875, "learning_rate": 1.7925383142597136e-05, "loss": 1.826, "step": 538 }, { "epoch": 0.49815157116451014, "grad_norm": 2.546875, "learning_rate": 1.7916051195519797e-05, "loss": 1.8746, "step": 539 }, { "epoch": 0.49907578558225507, "grad_norm": 2.328125, "learning_rate": 1.7906700747894035e-05, "loss": 1.7825, "step": 540 }, { "epoch": 0.5, "grad_norm": 2.53125, "learning_rate": 1.7897331821572716e-05, "loss": 1.7949, "step": 541 }, { "epoch": 0.5009242144177449, "grad_norm": 2.421875, "learning_rate": 1.7887944438451893e-05, "loss": 1.705, "step": 542 }, { "epoch": 0.5018484288354899, "grad_norm": 2.453125, "learning_rate": 1.7878538620470755e-05, "loss": 1.8534, "step": 543 }, { "epoch": 0.5027726432532348, "grad_norm": 2.484375, "learning_rate": 1.7869114389611574e-05, "loss": 1.7709, "step": 544 }, { "epoch": 0.5036968576709797, "grad_norm": 2.46875, "learning_rate": 1.7859671767899655e-05, "loss": 1.798, "step": 545 }, { "epoch": 0.5046210720887245, "grad_norm": 2.296875, "learning_rate": 1.7850210777403283e-05, "loss": 1.8015, "step": 546 }, { "epoch": 0.5055452865064695, "grad_norm": 2.390625, "learning_rate": 1.7840731440233673e-05, "loss": 1.7518, "step": 547 }, { "epoch": 0.5064695009242144, "grad_norm": 2.546875, "learning_rate": 1.783123377854492e-05, "loss": 1.8501, "step": 548 }, { "epoch": 0.5073937153419593, "grad_norm": 2.390625, "learning_rate": 1.7821717814533942e-05, "loss": 1.8816, "step": 549 }, { "epoch": 0.5083179297597042, "grad_norm": 2.34375, "learning_rate": 1.781218357044043e-05, "loss": 1.7582, "step": 550 }, { "epoch": 0.5092421441774492, "grad_norm": 2.4375, "learning_rate": 1.7802631068546805e-05, "loss": 1.7972, "step": 551 }, { "epoch": 0.5101663585951941, "grad_norm": 2.40625, "learning_rate": 1.779306033117815e-05, "loss": 1.8232, "step": 552 }, { "epoch": 0.511090573012939, "grad_norm": 2.296875, "learning_rate": 1.7783471380702167e-05, "loss": 1.7349, "step": 553 }, { "epoch": 0.512014787430684, "grad_norm": 2.328125, "learning_rate": 1.777386423952913e-05, "loss": 1.786, "step": 554 }, { "epoch": 0.5129390018484289, "grad_norm": 2.46875, "learning_rate": 1.7764238930111825e-05, "loss": 1.8225, "step": 555 }, { "epoch": 0.5138632162661737, "grad_norm": 2.375, "learning_rate": 1.7754595474945486e-05, "loss": 1.8403, "step": 556 }, { "epoch": 0.5147874306839186, "grad_norm": 2.25, "learning_rate": 1.7744933896567773e-05, "loss": 1.8103, "step": 557 }, { "epoch": 0.5157116451016636, "grad_norm": 2.25, "learning_rate": 1.773525421755869e-05, "loss": 1.7873, "step": 558 }, { "epoch": 0.5166358595194085, "grad_norm": 2.3125, "learning_rate": 1.7725556460540553e-05, "loss": 1.7976, "step": 559 }, { "epoch": 0.5175600739371534, "grad_norm": 2.265625, "learning_rate": 1.7715840648177916e-05, "loss": 1.7229, "step": 560 }, { "epoch": 0.5184842883548983, "grad_norm": 2.8125, "learning_rate": 1.7706106803177538e-05, "loss": 1.903, "step": 561 }, { "epoch": 0.5194085027726433, "grad_norm": 2.359375, "learning_rate": 1.769635494828833e-05, "loss": 1.8074, "step": 562 }, { "epoch": 0.5203327171903882, "grad_norm": 2.515625, "learning_rate": 1.768658510630127e-05, "loss": 1.781, "step": 563 }, { "epoch": 0.5212569316081331, "grad_norm": 2.4375, "learning_rate": 1.7676797300049395e-05, "loss": 1.8054, "step": 564 }, { "epoch": 0.522181146025878, "grad_norm": 2.515625, "learning_rate": 1.7666991552407723e-05, "loss": 1.7849, "step": 565 }, { "epoch": 0.5231053604436229, "grad_norm": 2.265625, "learning_rate": 1.7657167886293196e-05, "loss": 1.7184, "step": 566 }, { "epoch": 0.5240295748613678, "grad_norm": 2.5625, "learning_rate": 1.7647326324664633e-05, "loss": 1.7871, "step": 567 }, { "epoch": 0.5249537892791127, "grad_norm": 2.703125, "learning_rate": 1.763746689052268e-05, "loss": 1.7195, "step": 568 }, { "epoch": 0.5258780036968577, "grad_norm": 2.46875, "learning_rate": 1.7627589606909756e-05, "loss": 1.7034, "step": 569 }, { "epoch": 0.5268022181146026, "grad_norm": 2.359375, "learning_rate": 1.7617694496909994e-05, "loss": 1.7465, "step": 570 }, { "epoch": 0.5277264325323475, "grad_norm": 2.5, "learning_rate": 1.760778158364918e-05, "loss": 1.789, "step": 571 }, { "epoch": 0.5286506469500925, "grad_norm": 3.015625, "learning_rate": 1.7597850890294718e-05, "loss": 1.7155, "step": 572 }, { "epoch": 0.5295748613678374, "grad_norm": 2.546875, "learning_rate": 1.758790244005556e-05, "loss": 1.7861, "step": 573 }, { "epoch": 0.5304990757855823, "grad_norm": 2.25, "learning_rate": 1.757793625618217e-05, "loss": 1.7542, "step": 574 }, { "epoch": 0.5314232902033271, "grad_norm": 2.4375, "learning_rate": 1.7567952361966435e-05, "loss": 1.7936, "step": 575 }, { "epoch": 0.532347504621072, "grad_norm": 2.5625, "learning_rate": 1.7557950780741652e-05, "loss": 1.7876, "step": 576 }, { "epoch": 0.533271719038817, "grad_norm": 2.609375, "learning_rate": 1.7547931535882445e-05, "loss": 1.8651, "step": 577 }, { "epoch": 0.5341959334565619, "grad_norm": 2.359375, "learning_rate": 1.7537894650804725e-05, "loss": 1.834, "step": 578 }, { "epoch": 0.5351201478743068, "grad_norm": 2.3125, "learning_rate": 1.752784014896562e-05, "loss": 1.7637, "step": 579 }, { "epoch": 0.5360443622920518, "grad_norm": 2.375, "learning_rate": 1.751776805386344e-05, "loss": 1.8324, "step": 580 }, { "epoch": 0.5369685767097967, "grad_norm": 2.203125, "learning_rate": 1.7507678389037613e-05, "loss": 1.7368, "step": 581 }, { "epoch": 0.5378927911275416, "grad_norm": 2.53125, "learning_rate": 1.749757117806862e-05, "loss": 1.7988, "step": 582 }, { "epoch": 0.5388170055452866, "grad_norm": 2.46875, "learning_rate": 1.7487446444577955e-05, "loss": 1.7895, "step": 583 }, { "epoch": 0.5397412199630314, "grad_norm": 2.53125, "learning_rate": 1.747730421222806e-05, "loss": 1.8359, "step": 584 }, { "epoch": 0.5406654343807763, "grad_norm": 2.59375, "learning_rate": 1.746714450472228e-05, "loss": 1.7945, "step": 585 }, { "epoch": 0.5415896487985212, "grad_norm": 2.359375, "learning_rate": 1.7456967345804794e-05, "loss": 1.8113, "step": 586 }, { "epoch": 0.5425138632162662, "grad_norm": 2.34375, "learning_rate": 1.744677275926057e-05, "loss": 1.7825, "step": 587 }, { "epoch": 0.5434380776340111, "grad_norm": 2.515625, "learning_rate": 1.7436560768915312e-05, "loss": 1.7549, "step": 588 }, { "epoch": 0.544362292051756, "grad_norm": 2.3125, "learning_rate": 1.742633139863538e-05, "loss": 1.7137, "step": 589 }, { "epoch": 0.5452865064695009, "grad_norm": 2.28125, "learning_rate": 1.7416084672327774e-05, "loss": 1.7222, "step": 590 }, { "epoch": 0.5462107208872459, "grad_norm": 2.53125, "learning_rate": 1.7405820613940044e-05, "loss": 1.8542, "step": 591 }, { "epoch": 0.5471349353049908, "grad_norm": 2.25, "learning_rate": 1.7395539247460252e-05, "loss": 1.7546, "step": 592 }, { "epoch": 0.5480591497227357, "grad_norm": 2.359375, "learning_rate": 1.7385240596916905e-05, "loss": 1.6853, "step": 593 }, { "epoch": 0.5489833641404805, "grad_norm": 2.328125, "learning_rate": 1.7374924686378906e-05, "loss": 1.8847, "step": 594 }, { "epoch": 0.5499075785582255, "grad_norm": 2.375, "learning_rate": 1.7364591539955503e-05, "loss": 1.84, "step": 595 }, { "epoch": 0.5508317929759704, "grad_norm": 2.265625, "learning_rate": 1.7354241181796218e-05, "loss": 1.715, "step": 596 }, { "epoch": 0.5517560073937153, "grad_norm": 2.53125, "learning_rate": 1.73438736360908e-05, "loss": 1.7637, "step": 597 }, { "epoch": 0.5526802218114603, "grad_norm": 2.40625, "learning_rate": 1.733348892706918e-05, "loss": 1.7145, "step": 598 }, { "epoch": 0.5536044362292052, "grad_norm": 2.453125, "learning_rate": 1.7323087079001372e-05, "loss": 1.7938, "step": 599 }, { "epoch": 0.5545286506469501, "grad_norm": 2.296875, "learning_rate": 1.731266811619747e-05, "loss": 1.7954, "step": 600 }, { "epoch": 0.555452865064695, "grad_norm": 2.5, "learning_rate": 1.730223206300757e-05, "loss": 1.6671, "step": 601 }, { "epoch": 0.55637707948244, "grad_norm": 2.578125, "learning_rate": 1.729177894382168e-05, "loss": 1.8419, "step": 602 }, { "epoch": 0.5573012939001848, "grad_norm": 2.296875, "learning_rate": 1.7281308783069727e-05, "loss": 1.7749, "step": 603 }, { "epoch": 0.5582255083179297, "grad_norm": 2.40625, "learning_rate": 1.7270821605221448e-05, "loss": 1.8182, "step": 604 }, { "epoch": 0.5591497227356746, "grad_norm": 2.515625, "learning_rate": 1.7260317434786357e-05, "loss": 1.8423, "step": 605 }, { "epoch": 0.5600739371534196, "grad_norm": 2.484375, "learning_rate": 1.7249796296313668e-05, "loss": 1.7371, "step": 606 }, { "epoch": 0.5609981515711645, "grad_norm": 2.328125, "learning_rate": 1.723925821439227e-05, "loss": 1.8257, "step": 607 }, { "epoch": 0.5619223659889094, "grad_norm": 2.296875, "learning_rate": 1.7228703213650635e-05, "loss": 1.7477, "step": 608 }, { "epoch": 0.5628465804066544, "grad_norm": 2.28125, "learning_rate": 1.721813131875679e-05, "loss": 1.77, "step": 609 }, { "epoch": 0.5637707948243993, "grad_norm": 2.359375, "learning_rate": 1.720754255441823e-05, "loss": 1.7389, "step": 610 }, { "epoch": 0.5646950092421442, "grad_norm": 2.421875, "learning_rate": 1.7196936945381885e-05, "loss": 1.8286, "step": 611 }, { "epoch": 0.5656192236598891, "grad_norm": 2.390625, "learning_rate": 1.718631451643405e-05, "loss": 1.7894, "step": 612 }, { "epoch": 0.566543438077634, "grad_norm": 2.390625, "learning_rate": 1.717567529240033e-05, "loss": 1.8781, "step": 613 }, { "epoch": 0.5674676524953789, "grad_norm": 2.484375, "learning_rate": 1.7165019298145587e-05, "loss": 1.799, "step": 614 }, { "epoch": 0.5683918669131238, "grad_norm": 2.3125, "learning_rate": 1.7154346558573863e-05, "loss": 1.8503, "step": 615 }, { "epoch": 0.5693160813308688, "grad_norm": 2.28125, "learning_rate": 1.7143657098628353e-05, "loss": 1.7614, "step": 616 }, { "epoch": 0.5702402957486137, "grad_norm": 2.265625, "learning_rate": 1.713295094329131e-05, "loss": 1.7831, "step": 617 }, { "epoch": 0.5711645101663586, "grad_norm": 2.328125, "learning_rate": 1.712222811758402e-05, "loss": 1.8115, "step": 618 }, { "epoch": 0.5720887245841035, "grad_norm": 2.40625, "learning_rate": 1.7111488646566728e-05, "loss": 1.7405, "step": 619 }, { "epoch": 0.5730129390018485, "grad_norm": 2.328125, "learning_rate": 1.7100732555338573e-05, "loss": 1.9411, "step": 620 }, { "epoch": 0.5739371534195934, "grad_norm": 2.28125, "learning_rate": 1.7089959869037544e-05, "loss": 1.7312, "step": 621 }, { "epoch": 0.5748613678373382, "grad_norm": 2.328125, "learning_rate": 1.7079170612840404e-05, "loss": 1.7759, "step": 622 }, { "epoch": 0.5757855822550831, "grad_norm": 2.40625, "learning_rate": 1.7068364811962662e-05, "loss": 1.7094, "step": 623 }, { "epoch": 0.5767097966728281, "grad_norm": 2.390625, "learning_rate": 1.7057542491658467e-05, "loss": 1.7928, "step": 624 }, { "epoch": 0.577634011090573, "grad_norm": 2.265625, "learning_rate": 1.70467036772206e-05, "loss": 1.7901, "step": 625 }, { "epoch": 0.5785582255083179, "grad_norm": 2.328125, "learning_rate": 1.7035848393980372e-05, "loss": 1.8203, "step": 626 }, { "epoch": 0.5794824399260629, "grad_norm": 2.265625, "learning_rate": 1.7024976667307594e-05, "loss": 1.7715, "step": 627 }, { "epoch": 0.5804066543438078, "grad_norm": 2.328125, "learning_rate": 1.70140885226105e-05, "loss": 1.9386, "step": 628 }, { "epoch": 0.5813308687615527, "grad_norm": 2.546875, "learning_rate": 1.70031839853357e-05, "loss": 1.8386, "step": 629 }, { "epoch": 0.5822550831792976, "grad_norm": 2.40625, "learning_rate": 1.6992263080968112e-05, "loss": 1.7751, "step": 630 }, { "epoch": 0.5831792975970426, "grad_norm": 2.359375, "learning_rate": 1.6981325835030905e-05, "loss": 1.7702, "step": 631 }, { "epoch": 0.5841035120147874, "grad_norm": 2.390625, "learning_rate": 1.697037227308544e-05, "loss": 1.8312, "step": 632 }, { "epoch": 0.5850277264325323, "grad_norm": 2.375, "learning_rate": 1.695940242073122e-05, "loss": 1.809, "step": 633 }, { "epoch": 0.5859519408502772, "grad_norm": 2.390625, "learning_rate": 1.6948416303605796e-05, "loss": 1.7757, "step": 634 }, { "epoch": 0.5868761552680222, "grad_norm": 2.484375, "learning_rate": 1.6937413947384756e-05, "loss": 1.7591, "step": 635 }, { "epoch": 0.5878003696857671, "grad_norm": 2.28125, "learning_rate": 1.692639537778163e-05, "loss": 1.8168, "step": 636 }, { "epoch": 0.588724584103512, "grad_norm": 2.21875, "learning_rate": 1.691536062054783e-05, "loss": 1.8131, "step": 637 }, { "epoch": 0.589648798521257, "grad_norm": 2.484375, "learning_rate": 1.6904309701472627e-05, "loss": 1.7659, "step": 638 }, { "epoch": 0.5905730129390019, "grad_norm": 2.328125, "learning_rate": 1.689324264638304e-05, "loss": 1.7739, "step": 639 }, { "epoch": 0.5914972273567468, "grad_norm": 2.328125, "learning_rate": 1.68821594811438e-05, "loss": 1.792, "step": 640 }, { "epoch": 0.5924214417744916, "grad_norm": 2.25, "learning_rate": 1.6871060231657306e-05, "loss": 1.7787, "step": 641 }, { "epoch": 0.5933456561922366, "grad_norm": 2.296875, "learning_rate": 1.6859944923863536e-05, "loss": 1.8021, "step": 642 }, { "epoch": 0.5942698706099815, "grad_norm": 2.40625, "learning_rate": 1.6848813583739995e-05, "loss": 1.698, "step": 643 }, { "epoch": 0.5951940850277264, "grad_norm": 2.265625, "learning_rate": 1.683766623730166e-05, "loss": 1.7306, "step": 644 }, { "epoch": 0.5961182994454713, "grad_norm": 2.3125, "learning_rate": 1.6826502910600924e-05, "loss": 1.8539, "step": 645 }, { "epoch": 0.5970425138632163, "grad_norm": 2.40625, "learning_rate": 1.6815323629727513e-05, "loss": 1.8593, "step": 646 }, { "epoch": 0.5979667282809612, "grad_norm": 2.390625, "learning_rate": 1.680412842080845e-05, "loss": 1.8015, "step": 647 }, { "epoch": 0.5988909426987061, "grad_norm": 2.484375, "learning_rate": 1.679291731000798e-05, "loss": 1.7695, "step": 648 }, { "epoch": 0.5998151571164511, "grad_norm": 2.359375, "learning_rate": 1.6781690323527512e-05, "loss": 1.882, "step": 649 }, { "epoch": 0.600739371534196, "grad_norm": 2.21875, "learning_rate": 1.6770447487605558e-05, "loss": 1.7352, "step": 650 }, { "epoch": 0.6016635859519408, "grad_norm": 2.390625, "learning_rate": 1.6759188828517672e-05, "loss": 1.6697, "step": 651 }, { "epoch": 0.6025878003696857, "grad_norm": 2.25, "learning_rate": 1.6747914372576394e-05, "loss": 1.7724, "step": 652 }, { "epoch": 0.6035120147874307, "grad_norm": 2.296875, "learning_rate": 1.6736624146131165e-05, "loss": 1.7867, "step": 653 }, { "epoch": 0.6044362292051756, "grad_norm": 2.3125, "learning_rate": 1.6725318175568307e-05, "loss": 1.7952, "step": 654 }, { "epoch": 0.6053604436229205, "grad_norm": 2.25, "learning_rate": 1.6713996487310918e-05, "loss": 1.7732, "step": 655 }, { "epoch": 0.6062846580406654, "grad_norm": 2.265625, "learning_rate": 1.670265910781884e-05, "loss": 1.7278, "step": 656 }, { "epoch": 0.6072088724584104, "grad_norm": 2.203125, "learning_rate": 1.6691306063588583e-05, "loss": 1.7756, "step": 657 }, { "epoch": 0.6081330868761553, "grad_norm": 2.296875, "learning_rate": 1.667993738115327e-05, "loss": 1.7691, "step": 658 }, { "epoch": 0.6090573012939002, "grad_norm": 2.390625, "learning_rate": 1.666855308708257e-05, "loss": 1.7943, "step": 659 }, { "epoch": 0.609981515711645, "grad_norm": 2.6875, "learning_rate": 1.6657153207982633e-05, "loss": 1.872, "step": 660 }, { "epoch": 0.61090573012939, "grad_norm": 2.359375, "learning_rate": 1.6645737770496043e-05, "loss": 1.757, "step": 661 }, { "epoch": 0.6118299445471349, "grad_norm": 2.390625, "learning_rate": 1.6634306801301737e-05, "loss": 1.8742, "step": 662 }, { "epoch": 0.6127541589648798, "grad_norm": 2.328125, "learning_rate": 1.6622860327114953e-05, "loss": 1.8074, "step": 663 }, { "epoch": 0.6136783733826248, "grad_norm": 2.328125, "learning_rate": 1.6611398374687172e-05, "loss": 1.753, "step": 664 }, { "epoch": 0.6146025878003697, "grad_norm": 2.375, "learning_rate": 1.6599920970806035e-05, "loss": 1.7479, "step": 665 }, { "epoch": 0.6155268022181146, "grad_norm": 2.390625, "learning_rate": 1.6588428142295313e-05, "loss": 1.8057, "step": 666 }, { "epoch": 0.6164510166358595, "grad_norm": 2.296875, "learning_rate": 1.6576919916014808e-05, "loss": 1.7698, "step": 667 }, { "epoch": 0.6173752310536045, "grad_norm": 2.21875, "learning_rate": 1.656539631886032e-05, "loss": 1.6938, "step": 668 }, { "epoch": 0.6182994454713494, "grad_norm": 2.203125, "learning_rate": 1.6553857377763565e-05, "loss": 1.6874, "step": 669 }, { "epoch": 0.6192236598890942, "grad_norm": 2.40625, "learning_rate": 1.654230311969213e-05, "loss": 1.8034, "step": 670 }, { "epoch": 0.6201478743068392, "grad_norm": 2.34375, "learning_rate": 1.653073357164938e-05, "loss": 1.7782, "step": 671 }, { "epoch": 0.6210720887245841, "grad_norm": 2.28125, "learning_rate": 1.6519148760674435e-05, "loss": 1.7162, "step": 672 }, { "epoch": 0.621996303142329, "grad_norm": 2.375, "learning_rate": 1.650754871384207e-05, "loss": 1.8538, "step": 673 }, { "epoch": 0.6229205175600739, "grad_norm": 2.28125, "learning_rate": 1.649593345826268e-05, "loss": 1.7061, "step": 674 }, { "epoch": 0.6238447319778189, "grad_norm": 2.3125, "learning_rate": 1.648430302108219e-05, "loss": 1.7933, "step": 675 }, { "epoch": 0.6247689463955638, "grad_norm": 2.296875, "learning_rate": 1.6472657429482026e-05, "loss": 1.774, "step": 676 }, { "epoch": 0.6256931608133087, "grad_norm": 2.296875, "learning_rate": 1.6460996710679013e-05, "loss": 1.8082, "step": 677 }, { "epoch": 0.6266173752310537, "grad_norm": 2.34375, "learning_rate": 1.6449320891925334e-05, "loss": 1.767, "step": 678 }, { "epoch": 0.6275415896487985, "grad_norm": 2.328125, "learning_rate": 1.6437630000508466e-05, "loss": 1.7577, "step": 679 }, { "epoch": 0.6284658040665434, "grad_norm": 2.203125, "learning_rate": 1.6425924063751112e-05, "loss": 1.7203, "step": 680 }, { "epoch": 0.6293900184842883, "grad_norm": 2.3125, "learning_rate": 1.6414203109011132e-05, "loss": 1.7882, "step": 681 }, { "epoch": 0.6303142329020333, "grad_norm": 2.265625, "learning_rate": 1.640246716368149e-05, "loss": 1.7407, "step": 682 }, { "epoch": 0.6312384473197782, "grad_norm": 2.3125, "learning_rate": 1.6390716255190186e-05, "loss": 1.8388, "step": 683 }, { "epoch": 0.6321626617375231, "grad_norm": 2.390625, "learning_rate": 1.6378950411000183e-05, "loss": 1.7253, "step": 684 }, { "epoch": 0.633086876155268, "grad_norm": 2.28125, "learning_rate": 1.6367169658609353e-05, "loss": 1.7185, "step": 685 }, { "epoch": 0.634011090573013, "grad_norm": 2.28125, "learning_rate": 1.635537402555042e-05, "loss": 1.8591, "step": 686 }, { "epoch": 0.6349353049907579, "grad_norm": 2.234375, "learning_rate": 1.634356353939086e-05, "loss": 1.764, "step": 687 }, { "epoch": 0.6358595194085028, "grad_norm": 2.515625, "learning_rate": 1.633173822773289e-05, "loss": 1.8386, "step": 688 }, { "epoch": 0.6367837338262476, "grad_norm": 2.390625, "learning_rate": 1.6319898118213365e-05, "loss": 1.7715, "step": 689 }, { "epoch": 0.6377079482439926, "grad_norm": 2.484375, "learning_rate": 1.6308043238503717e-05, "loss": 1.8002, "step": 690 }, { "epoch": 0.6386321626617375, "grad_norm": 2.3125, "learning_rate": 1.6296173616309908e-05, "loss": 1.8008, "step": 691 }, { "epoch": 0.6395563770794824, "grad_norm": 2.390625, "learning_rate": 1.6284289279372343e-05, "loss": 1.7519, "step": 692 }, { "epoch": 0.6404805914972274, "grad_norm": 2.34375, "learning_rate": 1.627239025546583e-05, "loss": 1.7481, "step": 693 }, { "epoch": 0.6414048059149723, "grad_norm": 2.265625, "learning_rate": 1.6260476572399494e-05, "loss": 1.8054, "step": 694 }, { "epoch": 0.6423290203327172, "grad_norm": 2.234375, "learning_rate": 1.6248548258016722e-05, "loss": 1.7836, "step": 695 }, { "epoch": 0.6432532347504621, "grad_norm": 2.390625, "learning_rate": 1.6236605340195093e-05, "loss": 1.8227, "step": 696 }, { "epoch": 0.6441774491682071, "grad_norm": 2.328125, "learning_rate": 1.6224647846846314e-05, "loss": 1.8094, "step": 697 }, { "epoch": 0.6451016635859519, "grad_norm": 2.28125, "learning_rate": 1.621267580591617e-05, "loss": 1.8357, "step": 698 }, { "epoch": 0.6460258780036968, "grad_norm": 2.296875, "learning_rate": 1.6200689245384424e-05, "loss": 1.8122, "step": 699 }, { "epoch": 0.6469500924214417, "grad_norm": 2.3125, "learning_rate": 1.618868819326479e-05, "loss": 1.8756, "step": 700 }, { "epoch": 0.6478743068391867, "grad_norm": 2.484375, "learning_rate": 1.6176672677604844e-05, "loss": 1.7895, "step": 701 }, { "epoch": 0.6487985212569316, "grad_norm": 2.28125, "learning_rate": 1.616464272648596e-05, "loss": 1.7842, "step": 702 }, { "epoch": 0.6497227356746765, "grad_norm": 2.359375, "learning_rate": 1.615259836802326e-05, "loss": 1.6746, "step": 703 }, { "epoch": 0.6506469500924215, "grad_norm": 2.375, "learning_rate": 1.614053963036552e-05, "loss": 1.8615, "step": 704 }, { "epoch": 0.6515711645101664, "grad_norm": 2.3125, "learning_rate": 1.6128466541695144e-05, "loss": 1.7695, "step": 705 }, { "epoch": 0.6524953789279113, "grad_norm": 2.578125, "learning_rate": 1.611637913022806e-05, "loss": 1.7845, "step": 706 }, { "epoch": 0.6534195933456562, "grad_norm": 2.578125, "learning_rate": 1.610427742421367e-05, "loss": 1.7942, "step": 707 }, { "epoch": 0.6543438077634011, "grad_norm": 2.328125, "learning_rate": 1.609216145193479e-05, "loss": 1.8649, "step": 708 }, { "epoch": 0.655268022181146, "grad_norm": 2.25, "learning_rate": 1.608003124170758e-05, "loss": 1.7562, "step": 709 }, { "epoch": 0.6561922365988909, "grad_norm": 2.390625, "learning_rate": 1.606788682188146e-05, "loss": 1.7936, "step": 710 }, { "epoch": 0.6571164510166358, "grad_norm": 2.3125, "learning_rate": 1.6055728220839077e-05, "loss": 1.7874, "step": 711 }, { "epoch": 0.6580406654343808, "grad_norm": 2.265625, "learning_rate": 1.6043555466996206e-05, "loss": 1.7525, "step": 712 }, { "epoch": 0.6589648798521257, "grad_norm": 2.21875, "learning_rate": 1.6031368588801715e-05, "loss": 1.7345, "step": 713 }, { "epoch": 0.6598890942698706, "grad_norm": 2.328125, "learning_rate": 1.601916761473747e-05, "loss": 1.7493, "step": 714 }, { "epoch": 0.6608133086876156, "grad_norm": 2.4375, "learning_rate": 1.600695257331828e-05, "loss": 1.7529, "step": 715 }, { "epoch": 0.6617375231053605, "grad_norm": 2.328125, "learning_rate": 1.5994723493091832e-05, "loss": 1.779, "step": 716 }, { "epoch": 0.6626617375231053, "grad_norm": 2.3125, "learning_rate": 1.598248040263863e-05, "loss": 1.8572, "step": 717 }, { "epoch": 0.6635859519408502, "grad_norm": 2.296875, "learning_rate": 1.5970223330571912e-05, "loss": 1.7661, "step": 718 }, { "epoch": 0.6645101663585952, "grad_norm": 2.28125, "learning_rate": 1.59579523055376e-05, "loss": 1.8277, "step": 719 }, { "epoch": 0.6654343807763401, "grad_norm": 2.265625, "learning_rate": 1.5945667356214216e-05, "loss": 1.8224, "step": 720 }, { "epoch": 0.666358595194085, "grad_norm": 2.265625, "learning_rate": 1.5933368511312836e-05, "loss": 1.7635, "step": 721 }, { "epoch": 0.66728280961183, "grad_norm": 2.265625, "learning_rate": 1.5921055799577004e-05, "loss": 1.8137, "step": 722 }, { "epoch": 0.6682070240295749, "grad_norm": 2.1875, "learning_rate": 1.590872924978267e-05, "loss": 1.6704, "step": 723 }, { "epoch": 0.6691312384473198, "grad_norm": 2.21875, "learning_rate": 1.589638889073813e-05, "loss": 1.8304, "step": 724 }, { "epoch": 0.6700554528650647, "grad_norm": 2.53125, "learning_rate": 1.588403475128395e-05, "loss": 1.8284, "step": 725 }, { "epoch": 0.6709796672828097, "grad_norm": 2.359375, "learning_rate": 1.5871666860292905e-05, "loss": 1.8196, "step": 726 }, { "epoch": 0.6719038817005545, "grad_norm": 2.265625, "learning_rate": 1.5859285246669914e-05, "loss": 1.7755, "step": 727 }, { "epoch": 0.6728280961182994, "grad_norm": 2.421875, "learning_rate": 1.584688993935195e-05, "loss": 1.7778, "step": 728 }, { "epoch": 0.6737523105360443, "grad_norm": 2.421875, "learning_rate": 1.5834480967308005e-05, "loss": 1.727, "step": 729 }, { "epoch": 0.6746765249537893, "grad_norm": 2.3125, "learning_rate": 1.5822058359539002e-05, "loss": 1.7436, "step": 730 }, { "epoch": 0.6756007393715342, "grad_norm": 2.25, "learning_rate": 1.580962214507773e-05, "loss": 1.7105, "step": 731 }, { "epoch": 0.6765249537892791, "grad_norm": 2.296875, "learning_rate": 1.5797172352988775e-05, "loss": 1.8137, "step": 732 }, { "epoch": 0.677449168207024, "grad_norm": 2.359375, "learning_rate": 1.578470901236847e-05, "loss": 1.798, "step": 733 }, { "epoch": 0.678373382624769, "grad_norm": 2.546875, "learning_rate": 1.5772232152344797e-05, "loss": 1.8455, "step": 734 }, { "epoch": 0.6792975970425139, "grad_norm": 2.25, "learning_rate": 1.5759741802077337e-05, "loss": 1.739, "step": 735 }, { "epoch": 0.6802218114602587, "grad_norm": 2.28125, "learning_rate": 1.5747237990757205e-05, "loss": 1.803, "step": 736 }, { "epoch": 0.6811460258780037, "grad_norm": 2.328125, "learning_rate": 1.573472074760697e-05, "loss": 1.7791, "step": 737 }, { "epoch": 0.6820702402957486, "grad_norm": 2.265625, "learning_rate": 1.5722190101880594e-05, "loss": 1.7634, "step": 738 }, { "epoch": 0.6829944547134935, "grad_norm": 2.3125, "learning_rate": 1.570964608286336e-05, "loss": 1.7385, "step": 739 }, { "epoch": 0.6839186691312384, "grad_norm": 2.28125, "learning_rate": 1.5697088719871817e-05, "loss": 1.7355, "step": 740 }, { "epoch": 0.6848428835489834, "grad_norm": 2.3125, "learning_rate": 1.5684518042253677e-05, "loss": 1.7753, "step": 741 }, { "epoch": 0.6857670979667283, "grad_norm": 2.328125, "learning_rate": 1.5671934079387795e-05, "loss": 1.7161, "step": 742 }, { "epoch": 0.6866913123844732, "grad_norm": 2.765625, "learning_rate": 1.5659336860684062e-05, "loss": 1.8047, "step": 743 }, { "epoch": 0.6876155268022182, "grad_norm": 2.59375, "learning_rate": 1.5646726415583344e-05, "loss": 1.7531, "step": 744 }, { "epoch": 0.6885397412199631, "grad_norm": 2.203125, "learning_rate": 1.5634102773557432e-05, "loss": 1.6926, "step": 745 }, { "epoch": 0.6894639556377079, "grad_norm": 2.484375, "learning_rate": 1.5621465964108947e-05, "loss": 1.7061, "step": 746 }, { "epoch": 0.6903881700554528, "grad_norm": 2.6875, "learning_rate": 1.56088160167713e-05, "loss": 1.8197, "step": 747 }, { "epoch": 0.6913123844731978, "grad_norm": 2.515625, "learning_rate": 1.5596152961108577e-05, "loss": 1.7569, "step": 748 }, { "epoch": 0.6922365988909427, "grad_norm": 2.515625, "learning_rate": 1.5583476826715532e-05, "loss": 1.7616, "step": 749 }, { "epoch": 0.6931608133086876, "grad_norm": 2.328125, "learning_rate": 1.5570787643217465e-05, "loss": 1.7631, "step": 750 }, { "epoch": 0.6940850277264325, "grad_norm": 2.671875, "learning_rate": 1.5558085440270177e-05, "loss": 1.6993, "step": 751 }, { "epoch": 0.6950092421441775, "grad_norm": 2.8125, "learning_rate": 1.5545370247559907e-05, "loss": 1.8346, "step": 752 }, { "epoch": 0.6959334565619224, "grad_norm": 2.484375, "learning_rate": 1.553264209480324e-05, "loss": 1.7672, "step": 753 }, { "epoch": 0.6968576709796673, "grad_norm": 2.453125, "learning_rate": 1.5519901011747046e-05, "loss": 1.7772, "step": 754 }, { "epoch": 0.6977818853974121, "grad_norm": 2.3125, "learning_rate": 1.550714702816843e-05, "loss": 1.8091, "step": 755 }, { "epoch": 0.6987060998151571, "grad_norm": 2.265625, "learning_rate": 1.5494380173874638e-05, "loss": 1.7847, "step": 756 }, { "epoch": 0.699630314232902, "grad_norm": 2.578125, "learning_rate": 1.5481600478702995e-05, "loss": 1.7667, "step": 757 }, { "epoch": 0.7005545286506469, "grad_norm": 2.875, "learning_rate": 1.5468807972520837e-05, "loss": 1.9047, "step": 758 }, { "epoch": 0.7014787430683919, "grad_norm": 2.28125, "learning_rate": 1.5456002685225447e-05, "loss": 1.7364, "step": 759 }, { "epoch": 0.7024029574861368, "grad_norm": 2.515625, "learning_rate": 1.5443184646743968e-05, "loss": 1.8143, "step": 760 }, { "epoch": 0.7033271719038817, "grad_norm": 2.46875, "learning_rate": 1.5430353887033352e-05, "loss": 1.7119, "step": 761 }, { "epoch": 0.7042513863216266, "grad_norm": 2.65625, "learning_rate": 1.5417510436080276e-05, "loss": 1.7802, "step": 762 }, { "epoch": 0.7051756007393716, "grad_norm": 2.3125, "learning_rate": 1.5404654323901082e-05, "loss": 1.8215, "step": 763 }, { "epoch": 0.7060998151571165, "grad_norm": 2.265625, "learning_rate": 1.5391785580541697e-05, "loss": 1.8022, "step": 764 }, { "epoch": 0.7070240295748613, "grad_norm": 2.40625, "learning_rate": 1.537890423607758e-05, "loss": 1.8711, "step": 765 }, { "epoch": 0.7079482439926063, "grad_norm": 2.28125, "learning_rate": 1.536601032061362e-05, "loss": 1.7202, "step": 766 }, { "epoch": 0.7088724584103512, "grad_norm": 2.40625, "learning_rate": 1.535310386428411e-05, "loss": 1.7944, "step": 767 }, { "epoch": 0.7097966728280961, "grad_norm": 2.4375, "learning_rate": 1.534018489725263e-05, "loss": 1.8318, "step": 768 }, { "epoch": 0.710720887245841, "grad_norm": 2.328125, "learning_rate": 1.532725344971202e-05, "loss": 1.7648, "step": 769 }, { "epoch": 0.711645101663586, "grad_norm": 2.28125, "learning_rate": 1.5314309551884264e-05, "loss": 1.7047, "step": 770 }, { "epoch": 0.7125693160813309, "grad_norm": 2.359375, "learning_rate": 1.5301353234020462e-05, "loss": 1.7886, "step": 771 }, { "epoch": 0.7134935304990758, "grad_norm": 2.359375, "learning_rate": 1.5288384526400735e-05, "loss": 1.7931, "step": 772 }, { "epoch": 0.7144177449168208, "grad_norm": 2.296875, "learning_rate": 1.5275403459334154e-05, "loss": 1.8331, "step": 773 }, { "epoch": 0.7153419593345656, "grad_norm": 2.296875, "learning_rate": 1.526241006315869e-05, "loss": 1.855, "step": 774 }, { "epoch": 0.7162661737523105, "grad_norm": 2.375, "learning_rate": 1.5249404368241117e-05, "loss": 1.8126, "step": 775 }, { "epoch": 0.7171903881700554, "grad_norm": 2.234375, "learning_rate": 1.5236386404976949e-05, "loss": 1.754, "step": 776 }, { "epoch": 0.7181146025878004, "grad_norm": 2.359375, "learning_rate": 1.5223356203790381e-05, "loss": 1.8277, "step": 777 }, { "epoch": 0.7190388170055453, "grad_norm": 2.453125, "learning_rate": 1.5210313795134203e-05, "loss": 1.7469, "step": 778 }, { "epoch": 0.7199630314232902, "grad_norm": 2.375, "learning_rate": 1.5197259209489748e-05, "loss": 1.8264, "step": 779 }, { "epoch": 0.7208872458410351, "grad_norm": 2.265625, "learning_rate": 1.5184192477366786e-05, "loss": 1.7926, "step": 780 }, { "epoch": 0.7218114602587801, "grad_norm": 2.34375, "learning_rate": 1.517111362930349e-05, "loss": 1.7593, "step": 781 }, { "epoch": 0.722735674676525, "grad_norm": 2.515625, "learning_rate": 1.5158022695866348e-05, "loss": 1.793, "step": 782 }, { "epoch": 0.7236598890942699, "grad_norm": 2.328125, "learning_rate": 1.5144919707650088e-05, "loss": 1.7454, "step": 783 }, { "epoch": 0.7245841035120147, "grad_norm": 2.3125, "learning_rate": 1.5131804695277612e-05, "loss": 1.7493, "step": 784 }, { "epoch": 0.7255083179297597, "grad_norm": 2.28125, "learning_rate": 1.5118677689399923e-05, "loss": 1.7089, "step": 785 }, { "epoch": 0.7264325323475046, "grad_norm": 2.3125, "learning_rate": 1.5105538720696062e-05, "loss": 1.7174, "step": 786 }, { "epoch": 0.7273567467652495, "grad_norm": 2.359375, "learning_rate": 1.5092387819873016e-05, "loss": 1.7641, "step": 787 }, { "epoch": 0.7282809611829945, "grad_norm": 2.4375, "learning_rate": 1.5079225017665668e-05, "loss": 1.7438, "step": 788 }, { "epoch": 0.7292051756007394, "grad_norm": 2.375, "learning_rate": 1.5066050344836708e-05, "loss": 1.7347, "step": 789 }, { "epoch": 0.7301293900184843, "grad_norm": 2.40625, "learning_rate": 1.5052863832176572e-05, "loss": 1.8348, "step": 790 }, { "epoch": 0.7310536044362292, "grad_norm": 2.40625, "learning_rate": 1.5039665510503375e-05, "loss": 1.7957, "step": 791 }, { "epoch": 0.7319778188539742, "grad_norm": 2.3125, "learning_rate": 1.5026455410662814e-05, "loss": 1.8147, "step": 792 }, { "epoch": 0.732902033271719, "grad_norm": 2.3125, "learning_rate": 1.5013233563528129e-05, "loss": 1.8141, "step": 793 }, { "epoch": 0.7338262476894639, "grad_norm": 2.234375, "learning_rate": 1.5000000000000002e-05, "loss": 1.8052, "step": 794 }, { "epoch": 0.7347504621072088, "grad_norm": 2.1875, "learning_rate": 1.4986754751006505e-05, "loss": 1.8449, "step": 795 }, { "epoch": 0.7356746765249538, "grad_norm": 2.390625, "learning_rate": 1.4973497847503018e-05, "loss": 1.7465, "step": 796 }, { "epoch": 0.7365988909426987, "grad_norm": 2.4375, "learning_rate": 1.4960229320472156e-05, "loss": 1.8365, "step": 797 }, { "epoch": 0.7375231053604436, "grad_norm": 2.328125, "learning_rate": 1.4946949200923708e-05, "loss": 1.759, "step": 798 }, { "epoch": 0.7384473197781886, "grad_norm": 2.3125, "learning_rate": 1.4933657519894542e-05, "loss": 1.7429, "step": 799 }, { "epoch": 0.7393715341959335, "grad_norm": 2.375, "learning_rate": 1.4920354308448557e-05, "loss": 1.7776, "step": 800 }, { "epoch": 0.7402957486136784, "grad_norm": 2.34375, "learning_rate": 1.4907039597676598e-05, "loss": 1.7743, "step": 801 }, { "epoch": 0.7412199630314233, "grad_norm": 2.40625, "learning_rate": 1.4893713418696378e-05, "loss": 1.7352, "step": 802 }, { "epoch": 0.7421441774491682, "grad_norm": 2.390625, "learning_rate": 1.4880375802652423e-05, "loss": 1.774, "step": 803 }, { "epoch": 0.7430683918669131, "grad_norm": 2.375, "learning_rate": 1.4867026780715981e-05, "loss": 1.7264, "step": 804 }, { "epoch": 0.743992606284658, "grad_norm": 2.328125, "learning_rate": 1.4853666384084959e-05, "loss": 1.8298, "step": 805 }, { "epoch": 0.744916820702403, "grad_norm": 2.328125, "learning_rate": 1.4840294643983845e-05, "loss": 1.7876, "step": 806 }, { "epoch": 0.7458410351201479, "grad_norm": 2.390625, "learning_rate": 1.4826911591663644e-05, "loss": 1.7342, "step": 807 }, { "epoch": 0.7467652495378928, "grad_norm": 2.375, "learning_rate": 1.4813517258401793e-05, "loss": 1.7839, "step": 808 }, { "epoch": 0.7476894639556377, "grad_norm": 2.328125, "learning_rate": 1.4800111675502095e-05, "loss": 1.7903, "step": 809 }, { "epoch": 0.7486136783733827, "grad_norm": 2.34375, "learning_rate": 1.4786694874294647e-05, "loss": 1.7304, "step": 810 }, { "epoch": 0.7495378927911276, "grad_norm": 2.328125, "learning_rate": 1.4773266886135762e-05, "loss": 1.7142, "step": 811 }, { "epoch": 0.7504621072088724, "grad_norm": 2.28125, "learning_rate": 1.4759827742407893e-05, "loss": 1.6443, "step": 812 }, { "epoch": 0.7513863216266173, "grad_norm": 2.265625, "learning_rate": 1.4746377474519575e-05, "loss": 1.7573, "step": 813 }, { "epoch": 0.7523105360443623, "grad_norm": 2.390625, "learning_rate": 1.4732916113905336e-05, "loss": 1.8038, "step": 814 }, { "epoch": 0.7532347504621072, "grad_norm": 2.21875, "learning_rate": 1.4719443692025628e-05, "loss": 1.7174, "step": 815 }, { "epoch": 0.7541589648798521, "grad_norm": 2.390625, "learning_rate": 1.4705960240366754e-05, "loss": 1.7866, "step": 816 }, { "epoch": 0.755083179297597, "grad_norm": 2.375, "learning_rate": 1.4692465790440794e-05, "loss": 1.835, "step": 817 }, { "epoch": 0.756007393715342, "grad_norm": 2.375, "learning_rate": 1.4678960373785533e-05, "loss": 1.8002, "step": 818 }, { "epoch": 0.7569316081330869, "grad_norm": 2.328125, "learning_rate": 1.4665444021964392e-05, "loss": 1.7899, "step": 819 }, { "epoch": 0.7578558225508318, "grad_norm": 2.25, "learning_rate": 1.465191676656634e-05, "loss": 1.7414, "step": 820 }, { "epoch": 0.7587800369685767, "grad_norm": 2.3125, "learning_rate": 1.4638378639205833e-05, "loss": 1.7971, "step": 821 }, { "epoch": 0.7597042513863216, "grad_norm": 2.4375, "learning_rate": 1.4624829671522732e-05, "loss": 1.6684, "step": 822 }, { "epoch": 0.7606284658040665, "grad_norm": 2.265625, "learning_rate": 1.4611269895182239e-05, "loss": 1.8049, "step": 823 }, { "epoch": 0.7615526802218114, "grad_norm": 2.203125, "learning_rate": 1.4597699341874806e-05, "loss": 1.7747, "step": 824 }, { "epoch": 0.7624768946395564, "grad_norm": 2.3125, "learning_rate": 1.4584118043316088e-05, "loss": 1.8228, "step": 825 }, { "epoch": 0.7634011090573013, "grad_norm": 2.25, "learning_rate": 1.4570526031246844e-05, "loss": 1.7713, "step": 826 }, { "epoch": 0.7643253234750462, "grad_norm": 2.34375, "learning_rate": 1.4556923337432865e-05, "loss": 1.7706, "step": 827 }, { "epoch": 0.7652495378927912, "grad_norm": 2.265625, "learning_rate": 1.4543309993664916e-05, "loss": 1.7494, "step": 828 }, { "epoch": 0.7661737523105361, "grad_norm": 2.265625, "learning_rate": 1.4529686031758642e-05, "loss": 1.8213, "step": 829 }, { "epoch": 0.767097966728281, "grad_norm": 2.234375, "learning_rate": 1.4516051483554524e-05, "loss": 1.6974, "step": 830 }, { "epoch": 0.7680221811460258, "grad_norm": 2.3125, "learning_rate": 1.4502406380917757e-05, "loss": 1.7619, "step": 831 }, { "epoch": 0.7689463955637708, "grad_norm": 2.234375, "learning_rate": 1.4488750755738224e-05, "loss": 1.7384, "step": 832 }, { "epoch": 0.7698706099815157, "grad_norm": 2.25, "learning_rate": 1.4475084639930386e-05, "loss": 1.7818, "step": 833 }, { "epoch": 0.7707948243992606, "grad_norm": 2.359375, "learning_rate": 1.446140806543323e-05, "loss": 1.837, "step": 834 }, { "epoch": 0.7717190388170055, "grad_norm": 2.25, "learning_rate": 1.4447721064210184e-05, "loss": 1.7348, "step": 835 }, { "epoch": 0.7726432532347505, "grad_norm": 2.21875, "learning_rate": 1.4434023668249046e-05, "loss": 1.7972, "step": 836 }, { "epoch": 0.7735674676524954, "grad_norm": 2.25, "learning_rate": 1.4420315909561901e-05, "loss": 1.7634, "step": 837 }, { "epoch": 0.7744916820702403, "grad_norm": 2.5625, "learning_rate": 1.4406597820185063e-05, "loss": 1.7113, "step": 838 }, { "epoch": 0.7754158964879853, "grad_norm": 2.375, "learning_rate": 1.4392869432178974e-05, "loss": 1.6995, "step": 839 }, { "epoch": 0.7763401109057301, "grad_norm": 2.28125, "learning_rate": 1.4379130777628163e-05, "loss": 1.8317, "step": 840 }, { "epoch": 0.777264325323475, "grad_norm": 2.28125, "learning_rate": 1.436538188864114e-05, "loss": 1.7965, "step": 841 }, { "epoch": 0.7781885397412199, "grad_norm": 2.296875, "learning_rate": 1.4351622797350338e-05, "loss": 1.836, "step": 842 }, { "epoch": 0.7791127541589649, "grad_norm": 2.3125, "learning_rate": 1.4337853535912039e-05, "loss": 1.8173, "step": 843 }, { "epoch": 0.7800369685767098, "grad_norm": 2.3125, "learning_rate": 1.4324074136506283e-05, "loss": 1.8036, "step": 844 }, { "epoch": 0.7809611829944547, "grad_norm": 2.34375, "learning_rate": 1.4310284631336812e-05, "loss": 1.7517, "step": 845 }, { "epoch": 0.7818853974121996, "grad_norm": 2.28125, "learning_rate": 1.429648505263098e-05, "loss": 1.7596, "step": 846 }, { "epoch": 0.7828096118299446, "grad_norm": 2.25, "learning_rate": 1.428267543263969e-05, "loss": 1.7631, "step": 847 }, { "epoch": 0.7837338262476895, "grad_norm": 2.265625, "learning_rate": 1.426885580363731e-05, "loss": 1.7933, "step": 848 }, { "epoch": 0.7846580406654344, "grad_norm": 2.265625, "learning_rate": 1.4255026197921596e-05, "loss": 1.7236, "step": 849 }, { "epoch": 0.7855822550831792, "grad_norm": 2.28125, "learning_rate": 1.4241186647813626e-05, "loss": 1.8102, "step": 850 }, { "epoch": 0.7865064695009242, "grad_norm": 2.25, "learning_rate": 1.4227337185657712e-05, "loss": 1.8166, "step": 851 }, { "epoch": 0.7874306839186691, "grad_norm": 2.40625, "learning_rate": 1.4213477843821345e-05, "loss": 1.7751, "step": 852 }, { "epoch": 0.788354898336414, "grad_norm": 2.421875, "learning_rate": 1.4199608654695088e-05, "loss": 1.8383, "step": 853 }, { "epoch": 0.789279112754159, "grad_norm": 2.265625, "learning_rate": 1.4185729650692533e-05, "loss": 1.7932, "step": 854 }, { "epoch": 0.7902033271719039, "grad_norm": 2.25, "learning_rate": 1.41718408642502e-05, "loss": 1.7964, "step": 855 }, { "epoch": 0.7911275415896488, "grad_norm": 2.3125, "learning_rate": 1.4157942327827477e-05, "loss": 1.7234, "step": 856 }, { "epoch": 0.7920517560073937, "grad_norm": 2.328125, "learning_rate": 1.4144034073906539e-05, "loss": 1.8805, "step": 857 }, { "epoch": 0.7929759704251387, "grad_norm": 2.21875, "learning_rate": 1.4130116134992267e-05, "loss": 1.7262, "step": 858 }, { "epoch": 0.7939001848428835, "grad_norm": 2.1875, "learning_rate": 1.4116188543612182e-05, "loss": 1.7126, "step": 859 }, { "epoch": 0.7948243992606284, "grad_norm": 2.25, "learning_rate": 1.410225133231636e-05, "loss": 1.7707, "step": 860 }, { "epoch": 0.7957486136783734, "grad_norm": 2.421875, "learning_rate": 1.408830453367736e-05, "loss": 1.7615, "step": 861 }, { "epoch": 0.7966728280961183, "grad_norm": 2.359375, "learning_rate": 1.4074348180290151e-05, "loss": 1.7847, "step": 862 }, { "epoch": 0.7975970425138632, "grad_norm": 2.265625, "learning_rate": 1.4060382304772028e-05, "loss": 1.7322, "step": 863 }, { "epoch": 0.7985212569316081, "grad_norm": 2.28125, "learning_rate": 1.4046406939762545e-05, "loss": 1.7945, "step": 864 }, { "epoch": 0.7994454713493531, "grad_norm": 2.296875, "learning_rate": 1.4032422117923427e-05, "loss": 1.847, "step": 865 }, { "epoch": 0.800369685767098, "grad_norm": 2.328125, "learning_rate": 1.4018427871938508e-05, "loss": 1.661, "step": 866 }, { "epoch": 0.8012939001848429, "grad_norm": 2.3125, "learning_rate": 1.4004424234513638e-05, "loss": 1.8186, "step": 867 }, { "epoch": 0.8022181146025879, "grad_norm": 2.359375, "learning_rate": 1.3990411238376622e-05, "loss": 1.7442, "step": 868 }, { "epoch": 0.8031423290203327, "grad_norm": 2.28125, "learning_rate": 1.3976388916277141e-05, "loss": 1.7814, "step": 869 }, { "epoch": 0.8040665434380776, "grad_norm": 2.265625, "learning_rate": 1.3962357300986667e-05, "loss": 1.6996, "step": 870 }, { "epoch": 0.8049907578558225, "grad_norm": 2.296875, "learning_rate": 1.3948316425298386e-05, "loss": 1.7575, "step": 871 }, { "epoch": 0.8059149722735675, "grad_norm": 2.15625, "learning_rate": 1.3934266322027132e-05, "loss": 1.6443, "step": 872 }, { "epoch": 0.8068391866913124, "grad_norm": 2.453125, "learning_rate": 1.3920207024009305e-05, "loss": 1.8647, "step": 873 }, { "epoch": 0.8077634011090573, "grad_norm": 2.25, "learning_rate": 1.3906138564102794e-05, "loss": 1.8063, "step": 874 }, { "epoch": 0.8086876155268022, "grad_norm": 2.59375, "learning_rate": 1.3892060975186899e-05, "loss": 1.7265, "step": 875 }, { "epoch": 0.8096118299445472, "grad_norm": 2.25, "learning_rate": 1.3877974290162256e-05, "loss": 1.7838, "step": 876 }, { "epoch": 0.8105360443622921, "grad_norm": 2.3125, "learning_rate": 1.386387854195076e-05, "loss": 1.7849, "step": 877 }, { "epoch": 0.8114602587800369, "grad_norm": 2.359375, "learning_rate": 1.384977376349548e-05, "loss": 1.7519, "step": 878 }, { "epoch": 0.8123844731977818, "grad_norm": 2.3125, "learning_rate": 1.3835659987760605e-05, "loss": 1.8347, "step": 879 }, { "epoch": 0.8133086876155268, "grad_norm": 2.515625, "learning_rate": 1.3821537247731336e-05, "loss": 1.8398, "step": 880 }, { "epoch": 0.8142329020332717, "grad_norm": 2.265625, "learning_rate": 1.3807405576413833e-05, "loss": 1.8579, "step": 881 }, { "epoch": 0.8151571164510166, "grad_norm": 2.28125, "learning_rate": 1.3793265006835129e-05, "loss": 1.7627, "step": 882 }, { "epoch": 0.8160813308687616, "grad_norm": 2.3125, "learning_rate": 1.377911557204305e-05, "loss": 1.7712, "step": 883 }, { "epoch": 0.8170055452865065, "grad_norm": 2.21875, "learning_rate": 1.3764957305106142e-05, "loss": 1.745, "step": 884 }, { "epoch": 0.8179297597042514, "grad_norm": 2.34375, "learning_rate": 1.3750790239113588e-05, "loss": 1.6701, "step": 885 }, { "epoch": 0.8188539741219963, "grad_norm": 2.3125, "learning_rate": 1.3736614407175146e-05, "loss": 1.8181, "step": 886 }, { "epoch": 0.8197781885397413, "grad_norm": 2.359375, "learning_rate": 1.3722429842421055e-05, "loss": 1.8625, "step": 887 }, { "epoch": 0.8207024029574861, "grad_norm": 2.28125, "learning_rate": 1.3708236578001958e-05, "loss": 1.858, "step": 888 }, { "epoch": 0.821626617375231, "grad_norm": 2.390625, "learning_rate": 1.369403464708884e-05, "loss": 1.7923, "step": 889 }, { "epoch": 0.822550831792976, "grad_norm": 2.328125, "learning_rate": 1.3679824082872932e-05, "loss": 1.7148, "step": 890 }, { "epoch": 0.8234750462107209, "grad_norm": 2.375, "learning_rate": 1.3665604918565646e-05, "loss": 1.7588, "step": 891 }, { "epoch": 0.8243992606284658, "grad_norm": 2.25, "learning_rate": 1.3651377187398494e-05, "loss": 1.8226, "step": 892 }, { "epoch": 0.8253234750462107, "grad_norm": 2.40625, "learning_rate": 1.3637140922623007e-05, "loss": 1.8, "step": 893 }, { "epoch": 0.8262476894639557, "grad_norm": 2.28125, "learning_rate": 1.362289615751066e-05, "loss": 1.7712, "step": 894 }, { "epoch": 0.8271719038817006, "grad_norm": 2.296875, "learning_rate": 1.3608642925352794e-05, "loss": 1.7373, "step": 895 }, { "epoch": 0.8280961182994455, "grad_norm": 2.265625, "learning_rate": 1.359438125946054e-05, "loss": 1.8171, "step": 896 }, { "epoch": 0.8290203327171903, "grad_norm": 2.3125, "learning_rate": 1.358011119316474e-05, "loss": 1.7703, "step": 897 }, { "epoch": 0.8299445471349353, "grad_norm": 2.328125, "learning_rate": 1.3565832759815866e-05, "loss": 1.8006, "step": 898 }, { "epoch": 0.8308687615526802, "grad_norm": 2.140625, "learning_rate": 1.3551545992783948e-05, "loss": 1.6905, "step": 899 }, { "epoch": 0.8317929759704251, "grad_norm": 2.25, "learning_rate": 1.3537250925458488e-05, "loss": 1.805, "step": 900 }, { "epoch": 0.83271719038817, "grad_norm": 2.25, "learning_rate": 1.3522947591248388e-05, "loss": 1.7636, "step": 901 }, { "epoch": 0.833641404805915, "grad_norm": 2.3125, "learning_rate": 1.3508636023581871e-05, "loss": 1.7451, "step": 902 }, { "epoch": 0.8345656192236599, "grad_norm": 2.171875, "learning_rate": 1.3494316255906405e-05, "loss": 1.7164, "step": 903 }, { "epoch": 0.8354898336414048, "grad_norm": 2.1875, "learning_rate": 1.3479988321688619e-05, "loss": 1.7113, "step": 904 }, { "epoch": 0.8364140480591498, "grad_norm": 2.25, "learning_rate": 1.3465652254414229e-05, "loss": 1.7954, "step": 905 }, { "epoch": 0.8373382624768947, "grad_norm": 2.34375, "learning_rate": 1.345130808758796e-05, "loss": 1.8064, "step": 906 }, { "epoch": 0.8382624768946395, "grad_norm": 2.25, "learning_rate": 1.343695585473346e-05, "loss": 1.6947, "step": 907 }, { "epoch": 0.8391866913123844, "grad_norm": 2.265625, "learning_rate": 1.3422595589393237e-05, "loss": 1.7491, "step": 908 }, { "epoch": 0.8401109057301294, "grad_norm": 2.25, "learning_rate": 1.3408227325128569e-05, "loss": 1.7537, "step": 909 }, { "epoch": 0.8410351201478743, "grad_norm": 2.296875, "learning_rate": 1.3393851095519424e-05, "loss": 1.8628, "step": 910 }, { "epoch": 0.8419593345656192, "grad_norm": 2.328125, "learning_rate": 1.3379466934164389e-05, "loss": 1.756, "step": 911 }, { "epoch": 0.8428835489833642, "grad_norm": 2.359375, "learning_rate": 1.3365074874680586e-05, "loss": 1.7818, "step": 912 }, { "epoch": 0.8438077634011091, "grad_norm": 2.34375, "learning_rate": 1.3350674950703601e-05, "loss": 1.772, "step": 913 }, { "epoch": 0.844731977818854, "grad_norm": 2.234375, "learning_rate": 1.3336267195887399e-05, "loss": 1.76, "step": 914 }, { "epoch": 0.8456561922365989, "grad_norm": 2.34375, "learning_rate": 1.3321851643904236e-05, "loss": 1.7541, "step": 915 }, { "epoch": 0.8465804066543438, "grad_norm": 2.296875, "learning_rate": 1.3307428328444605e-05, "loss": 1.8326, "step": 916 }, { "epoch": 0.8475046210720887, "grad_norm": 2.296875, "learning_rate": 1.3292997283217133e-05, "loss": 1.8199, "step": 917 }, { "epoch": 0.8484288354898336, "grad_norm": 2.21875, "learning_rate": 1.327855854194852e-05, "loss": 1.7938, "step": 918 }, { "epoch": 0.8493530499075785, "grad_norm": 2.25, "learning_rate": 1.3264112138383445e-05, "loss": 1.805, "step": 919 }, { "epoch": 0.8502772643253235, "grad_norm": 2.25, "learning_rate": 1.32496581062845e-05, "loss": 1.7256, "step": 920 }, { "epoch": 0.8512014787430684, "grad_norm": 2.34375, "learning_rate": 1.3235196479432099e-05, "loss": 1.8061, "step": 921 }, { "epoch": 0.8521256931608133, "grad_norm": 2.265625, "learning_rate": 1.3220727291624415e-05, "loss": 1.7492, "step": 922 }, { "epoch": 0.8530499075785583, "grad_norm": 2.21875, "learning_rate": 1.3206250576677284e-05, "loss": 1.701, "step": 923 }, { "epoch": 0.8539741219963032, "grad_norm": 2.28125, "learning_rate": 1.3191766368424134e-05, "loss": 1.7678, "step": 924 }, { "epoch": 0.8548983364140481, "grad_norm": 2.265625, "learning_rate": 1.3177274700715914e-05, "loss": 1.7748, "step": 925 }, { "epoch": 0.8558225508317929, "grad_norm": 2.203125, "learning_rate": 1.3162775607420995e-05, "loss": 1.7107, "step": 926 }, { "epoch": 0.8567467652495379, "grad_norm": 2.28125, "learning_rate": 1.314826912242511e-05, "loss": 1.7665, "step": 927 }, { "epoch": 0.8576709796672828, "grad_norm": 2.421875, "learning_rate": 1.3133755279631264e-05, "loss": 1.7532, "step": 928 }, { "epoch": 0.8585951940850277, "grad_norm": 2.265625, "learning_rate": 1.3119234112959656e-05, "loss": 1.7546, "step": 929 }, { "epoch": 0.8595194085027726, "grad_norm": 2.296875, "learning_rate": 1.310470565634761e-05, "loss": 1.8021, "step": 930 }, { "epoch": 0.8604436229205176, "grad_norm": 2.25, "learning_rate": 1.3090169943749475e-05, "loss": 1.6847, "step": 931 }, { "epoch": 0.8613678373382625, "grad_norm": 2.203125, "learning_rate": 1.307562700913657e-05, "loss": 1.767, "step": 932 }, { "epoch": 0.8622920517560074, "grad_norm": 2.296875, "learning_rate": 1.3061076886497082e-05, "loss": 1.6855, "step": 933 }, { "epoch": 0.8632162661737524, "grad_norm": 2.296875, "learning_rate": 1.3046519609836002e-05, "loss": 1.7172, "step": 934 }, { "epoch": 0.8641404805914972, "grad_norm": 2.25, "learning_rate": 1.303195521317504e-05, "loss": 1.8457, "step": 935 }, { "epoch": 0.8650646950092421, "grad_norm": 2.296875, "learning_rate": 1.3017383730552553e-05, "loss": 1.669, "step": 936 }, { "epoch": 0.865988909426987, "grad_norm": 2.265625, "learning_rate": 1.3002805196023447e-05, "loss": 1.7659, "step": 937 }, { "epoch": 0.866913123844732, "grad_norm": 2.25, "learning_rate": 1.2988219643659114e-05, "loss": 1.7613, "step": 938 }, { "epoch": 0.8678373382624769, "grad_norm": 2.25, "learning_rate": 1.2973627107547346e-05, "loss": 1.8635, "step": 939 }, { "epoch": 0.8687615526802218, "grad_norm": 2.28125, "learning_rate": 1.2959027621792266e-05, "loss": 1.7871, "step": 940 }, { "epoch": 0.8696857670979667, "grad_norm": 2.40625, "learning_rate": 1.294442122051422e-05, "loss": 1.8043, "step": 941 }, { "epoch": 0.8706099815157117, "grad_norm": 2.28125, "learning_rate": 1.2929807937849735e-05, "loss": 1.7799, "step": 942 }, { "epoch": 0.8715341959334566, "grad_norm": 2.25, "learning_rate": 1.2915187807951405e-05, "loss": 1.791, "step": 943 }, { "epoch": 0.8724584103512015, "grad_norm": 2.34375, "learning_rate": 1.290056086498785e-05, "loss": 1.6993, "step": 944 }, { "epoch": 0.8733826247689463, "grad_norm": 2.25, "learning_rate": 1.288592714314358e-05, "loss": 1.7641, "step": 945 }, { "epoch": 0.8743068391866913, "grad_norm": 2.265625, "learning_rate": 1.287128667661897e-05, "loss": 1.7605, "step": 946 }, { "epoch": 0.8752310536044362, "grad_norm": 2.328125, "learning_rate": 1.285663949963016e-05, "loss": 1.8159, "step": 947 }, { "epoch": 0.8761552680221811, "grad_norm": 2.296875, "learning_rate": 1.2841985646408954e-05, "loss": 1.7955, "step": 948 }, { "epoch": 0.8770794824399261, "grad_norm": 2.234375, "learning_rate": 1.2827325151202783e-05, "loss": 1.717, "step": 949 }, { "epoch": 0.878003696857671, "grad_norm": 2.21875, "learning_rate": 1.281265804827458e-05, "loss": 1.7707, "step": 950 }, { "epoch": 0.8789279112754159, "grad_norm": 2.265625, "learning_rate": 1.2797984371902732e-05, "loss": 1.7301, "step": 951 }, { "epoch": 0.8798521256931608, "grad_norm": 2.328125, "learning_rate": 1.2783304156380989e-05, "loss": 1.7663, "step": 952 }, { "epoch": 0.8807763401109058, "grad_norm": 2.359375, "learning_rate": 1.276861743601838e-05, "loss": 1.8238, "step": 953 }, { "epoch": 0.8817005545286506, "grad_norm": 2.390625, "learning_rate": 1.2753924245139135e-05, "loss": 1.809, "step": 954 }, { "epoch": 0.8826247689463955, "grad_norm": 2.265625, "learning_rate": 1.2739224618082612e-05, "loss": 1.7541, "step": 955 }, { "epoch": 0.8835489833641405, "grad_norm": 2.21875, "learning_rate": 1.2724518589203204e-05, "loss": 1.7941, "step": 956 }, { "epoch": 0.8844731977818854, "grad_norm": 2.203125, "learning_rate": 1.2709806192870272e-05, "loss": 1.682, "step": 957 }, { "epoch": 0.8853974121996303, "grad_norm": 2.203125, "learning_rate": 1.2695087463468054e-05, "loss": 1.7269, "step": 958 }, { "epoch": 0.8863216266173752, "grad_norm": 2.265625, "learning_rate": 1.2680362435395595e-05, "loss": 1.7166, "step": 959 }, { "epoch": 0.8872458410351202, "grad_norm": 2.171875, "learning_rate": 1.2665631143066647e-05, "loss": 1.7366, "step": 960 }, { "epoch": 0.8881700554528651, "grad_norm": 2.3125, "learning_rate": 1.2650893620909621e-05, "loss": 1.765, "step": 961 }, { "epoch": 0.88909426987061, "grad_norm": 2.375, "learning_rate": 1.2636149903367472e-05, "loss": 1.745, "step": 962 }, { "epoch": 0.890018484288355, "grad_norm": 2.34375, "learning_rate": 1.2621400024897642e-05, "loss": 1.7293, "step": 963 }, { "epoch": 0.8909426987060998, "grad_norm": 2.390625, "learning_rate": 1.2606644019971967e-05, "loss": 1.8037, "step": 964 }, { "epoch": 0.8918669131238447, "grad_norm": 2.328125, "learning_rate": 1.259188192307661e-05, "loss": 1.7243, "step": 965 }, { "epoch": 0.8927911275415896, "grad_norm": 2.40625, "learning_rate": 1.2577113768711959e-05, "loss": 1.7914, "step": 966 }, { "epoch": 0.8937153419593346, "grad_norm": 2.234375, "learning_rate": 1.2562339591392571e-05, "loss": 1.7615, "step": 967 }, { "epoch": 0.8946395563770795, "grad_norm": 2.328125, "learning_rate": 1.2547559425647068e-05, "loss": 1.8003, "step": 968 }, { "epoch": 0.8955637707948244, "grad_norm": 2.28125, "learning_rate": 1.2532773306018075e-05, "loss": 1.796, "step": 969 }, { "epoch": 0.8964879852125693, "grad_norm": 2.21875, "learning_rate": 1.2517981267062133e-05, "loss": 1.6651, "step": 970 }, { "epoch": 0.8974121996303143, "grad_norm": 2.234375, "learning_rate": 1.2503183343349612e-05, "loss": 1.8054, "step": 971 }, { "epoch": 0.8983364140480592, "grad_norm": 2.359375, "learning_rate": 1.2488379569464634e-05, "loss": 1.8059, "step": 972 }, { "epoch": 0.899260628465804, "grad_norm": 2.28125, "learning_rate": 1.2473569980005001e-05, "loss": 1.7695, "step": 973 }, { "epoch": 0.9001848428835489, "grad_norm": 2.203125, "learning_rate": 1.2458754609582098e-05, "loss": 1.7513, "step": 974 }, { "epoch": 0.9011090573012939, "grad_norm": 2.328125, "learning_rate": 1.2443933492820826e-05, "loss": 1.6973, "step": 975 }, { "epoch": 0.9020332717190388, "grad_norm": 2.265625, "learning_rate": 1.2429106664359515e-05, "loss": 1.7944, "step": 976 }, { "epoch": 0.9029574861367837, "grad_norm": 2.234375, "learning_rate": 1.2414274158849845e-05, "loss": 1.7366, "step": 977 }, { "epoch": 0.9038817005545287, "grad_norm": 2.5, "learning_rate": 1.2399436010956753e-05, "loss": 1.8152, "step": 978 }, { "epoch": 0.9048059149722736, "grad_norm": 2.28125, "learning_rate": 1.2384592255358385e-05, "loss": 1.7479, "step": 979 }, { "epoch": 0.9057301293900185, "grad_norm": 2.3125, "learning_rate": 1.2369742926745969e-05, "loss": 1.8034, "step": 980 }, { "epoch": 0.9066543438077634, "grad_norm": 2.171875, "learning_rate": 1.2354888059823773e-05, "loss": 1.7525, "step": 981 }, { "epoch": 0.9075785582255084, "grad_norm": 2.328125, "learning_rate": 1.2340027689309e-05, "loss": 1.8198, "step": 982 }, { "epoch": 0.9085027726432532, "grad_norm": 2.234375, "learning_rate": 1.2325161849931726e-05, "loss": 1.7491, "step": 983 }, { "epoch": 0.9094269870609981, "grad_norm": 2.34375, "learning_rate": 1.2310290576434795e-05, "loss": 1.7972, "step": 984 }, { "epoch": 0.910351201478743, "grad_norm": 2.1875, "learning_rate": 1.2295413903573757e-05, "loss": 1.6824, "step": 985 }, { "epoch": 0.911275415896488, "grad_norm": 2.375, "learning_rate": 1.2280531866116783e-05, "loss": 1.7569, "step": 986 }, { "epoch": 0.9121996303142329, "grad_norm": 2.296875, "learning_rate": 1.226564449884458e-05, "loss": 1.7574, "step": 987 }, { "epoch": 0.9131238447319778, "grad_norm": 2.296875, "learning_rate": 1.2250751836550312e-05, "loss": 1.7377, "step": 988 }, { "epoch": 0.9140480591497228, "grad_norm": 2.21875, "learning_rate": 1.2235853914039515e-05, "loss": 1.7504, "step": 989 }, { "epoch": 0.9149722735674677, "grad_norm": 2.28125, "learning_rate": 1.2220950766130017e-05, "loss": 1.7993, "step": 990 }, { "epoch": 0.9158964879852126, "grad_norm": 2.15625, "learning_rate": 1.2206042427651873e-05, "loss": 1.7808, "step": 991 }, { "epoch": 0.9168207024029574, "grad_norm": 2.5, "learning_rate": 1.2191128933447244e-05, "loss": 1.8045, "step": 992 }, { "epoch": 0.9177449168207024, "grad_norm": 2.375, "learning_rate": 1.2176210318370365e-05, "loss": 1.8367, "step": 993 }, { "epoch": 0.9186691312384473, "grad_norm": 2.328125, "learning_rate": 1.216128661728742e-05, "loss": 1.7854, "step": 994 }, { "epoch": 0.9195933456561922, "grad_norm": 2.25, "learning_rate": 1.214635786507649e-05, "loss": 1.8107, "step": 995 }, { "epoch": 0.9205175600739371, "grad_norm": 2.359375, "learning_rate": 1.2131424096627465e-05, "loss": 1.8171, "step": 996 }, { "epoch": 0.9214417744916821, "grad_norm": 2.34375, "learning_rate": 1.2116485346841939e-05, "loss": 1.6712, "step": 997 }, { "epoch": 0.922365988909427, "grad_norm": 2.578125, "learning_rate": 1.2101541650633173e-05, "loss": 1.7513, "step": 998 }, { "epoch": 0.9232902033271719, "grad_norm": 2.203125, "learning_rate": 1.2086593042925964e-05, "loss": 1.697, "step": 999 }, { "epoch": 0.9242144177449169, "grad_norm": 2.25, "learning_rate": 1.2071639558656614e-05, "loss": 1.7592, "step": 1000 }, { "epoch": 0.9251386321626618, "grad_norm": 2.28125, "learning_rate": 1.2056681232772798e-05, "loss": 1.8038, "step": 1001 }, { "epoch": 0.9260628465804066, "grad_norm": 2.375, "learning_rate": 1.2041718100233513e-05, "loss": 1.6955, "step": 1002 }, { "epoch": 0.9269870609981515, "grad_norm": 2.140625, "learning_rate": 1.2026750196008999e-05, "loss": 1.7241, "step": 1003 }, { "epoch": 0.9279112754158965, "grad_norm": 2.25, "learning_rate": 1.2011777555080639e-05, "loss": 1.7432, "step": 1004 }, { "epoch": 0.9288354898336414, "grad_norm": 2.265625, "learning_rate": 1.1996800212440891e-05, "loss": 1.7739, "step": 1005 }, { "epoch": 0.9297597042513863, "grad_norm": 2.1875, "learning_rate": 1.1981818203093193e-05, "loss": 1.7539, "step": 1006 }, { "epoch": 0.9306839186691312, "grad_norm": 2.328125, "learning_rate": 1.1966831562051899e-05, "loss": 1.7583, "step": 1007 }, { "epoch": 0.9316081330868762, "grad_norm": 2.234375, "learning_rate": 1.1951840324342182e-05, "loss": 1.7792, "step": 1008 }, { "epoch": 0.9325323475046211, "grad_norm": 2.21875, "learning_rate": 1.1936844524999966e-05, "loss": 1.8641, "step": 1009 }, { "epoch": 0.933456561922366, "grad_norm": 2.4375, "learning_rate": 1.1921844199071823e-05, "loss": 1.8336, "step": 1010 }, { "epoch": 0.9343807763401109, "grad_norm": 2.28125, "learning_rate": 1.1906839381614913e-05, "loss": 1.6826, "step": 1011 }, { "epoch": 0.9353049907578558, "grad_norm": 2.265625, "learning_rate": 1.189183010769689e-05, "loss": 1.7652, "step": 1012 }, { "epoch": 0.9362292051756007, "grad_norm": 2.296875, "learning_rate": 1.1876816412395825e-05, "loss": 1.818, "step": 1013 }, { "epoch": 0.9371534195933456, "grad_norm": 2.140625, "learning_rate": 1.1861798330800125e-05, "loss": 1.724, "step": 1014 }, { "epoch": 0.9380776340110906, "grad_norm": 2.234375, "learning_rate": 1.1846775898008438e-05, "loss": 1.6802, "step": 1015 }, { "epoch": 0.9390018484288355, "grad_norm": 2.328125, "learning_rate": 1.183174914912959e-05, "loss": 1.7649, "step": 1016 }, { "epoch": 0.9399260628465804, "grad_norm": 2.25, "learning_rate": 1.1816718119282492e-05, "loss": 1.798, "step": 1017 }, { "epoch": 0.9408502772643254, "grad_norm": 2.296875, "learning_rate": 1.1801682843596064e-05, "loss": 1.7898, "step": 1018 }, { "epoch": 0.9417744916820703, "grad_norm": 2.265625, "learning_rate": 1.1786643357209136e-05, "loss": 1.773, "step": 1019 }, { "epoch": 0.9426987060998152, "grad_norm": 2.28125, "learning_rate": 1.1771599695270393e-05, "loss": 1.7778, "step": 1020 }, { "epoch": 0.94362292051756, "grad_norm": 2.203125, "learning_rate": 1.1756551892938273e-05, "loss": 1.7781, "step": 1021 }, { "epoch": 0.944547134935305, "grad_norm": 2.3125, "learning_rate": 1.1741499985380892e-05, "loss": 1.7698, "step": 1022 }, { "epoch": 0.9454713493530499, "grad_norm": 2.21875, "learning_rate": 1.1726444007775954e-05, "loss": 1.7416, "step": 1023 }, { "epoch": 0.9463955637707948, "grad_norm": 2.21875, "learning_rate": 1.171138399531068e-05, "loss": 1.7448, "step": 1024 }, { "epoch": 0.9473197781885397, "grad_norm": 2.265625, "learning_rate": 1.169631998318173e-05, "loss": 1.7282, "step": 1025 }, { "epoch": 0.9482439926062847, "grad_norm": 2.234375, "learning_rate": 1.1681252006595092e-05, "loss": 1.7564, "step": 1026 }, { "epoch": 0.9491682070240296, "grad_norm": 2.390625, "learning_rate": 1.1666180100766035e-05, "loss": 1.7234, "step": 1027 }, { "epoch": 0.9500924214417745, "grad_norm": 2.265625, "learning_rate": 1.1651104300919005e-05, "loss": 1.7624, "step": 1028 }, { "epoch": 0.9510166358595195, "grad_norm": 2.28125, "learning_rate": 1.1636024642287546e-05, "loss": 1.7008, "step": 1029 }, { "epoch": 0.9519408502772643, "grad_norm": 2.375, "learning_rate": 1.162094116011423e-05, "loss": 1.774, "step": 1030 }, { "epoch": 0.9528650646950092, "grad_norm": 2.28125, "learning_rate": 1.1605853889650553e-05, "loss": 1.7911, "step": 1031 }, { "epoch": 0.9537892791127541, "grad_norm": 2.25, "learning_rate": 1.1590762866156873e-05, "loss": 1.7926, "step": 1032 }, { "epoch": 0.9547134935304991, "grad_norm": 2.3125, "learning_rate": 1.1575668124902317e-05, "loss": 1.8248, "step": 1033 }, { "epoch": 0.955637707948244, "grad_norm": 2.265625, "learning_rate": 1.1560569701164696e-05, "loss": 1.6901, "step": 1034 }, { "epoch": 0.9565619223659889, "grad_norm": 2.265625, "learning_rate": 1.1545467630230442e-05, "loss": 1.7256, "step": 1035 }, { "epoch": 0.9574861367837338, "grad_norm": 2.34375, "learning_rate": 1.1530361947394487e-05, "loss": 1.8637, "step": 1036 }, { "epoch": 0.9584103512014788, "grad_norm": 2.1875, "learning_rate": 1.1515252687960226e-05, "loss": 1.7705, "step": 1037 }, { "epoch": 0.9593345656192237, "grad_norm": 2.25, "learning_rate": 1.1500139887239401e-05, "loss": 1.7753, "step": 1038 }, { "epoch": 0.9602587800369686, "grad_norm": 2.21875, "learning_rate": 1.1485023580552039e-05, "loss": 1.6853, "step": 1039 }, { "epoch": 0.9611829944547134, "grad_norm": 2.296875, "learning_rate": 1.1469903803226351e-05, "loss": 1.7484, "step": 1040 }, { "epoch": 0.9621072088724584, "grad_norm": 2.25, "learning_rate": 1.1454780590598662e-05, "loss": 1.7542, "step": 1041 }, { "epoch": 0.9630314232902033, "grad_norm": 2.328125, "learning_rate": 1.1439653978013334e-05, "loss": 1.8801, "step": 1042 }, { "epoch": 0.9639556377079482, "grad_norm": 2.328125, "learning_rate": 1.1424524000822663e-05, "loss": 1.7408, "step": 1043 }, { "epoch": 0.9648798521256932, "grad_norm": 2.3125, "learning_rate": 1.1409390694386818e-05, "loss": 1.7599, "step": 1044 }, { "epoch": 0.9658040665434381, "grad_norm": 2.359375, "learning_rate": 1.139425409407374e-05, "loss": 1.7153, "step": 1045 }, { "epoch": 0.966728280961183, "grad_norm": 2.1875, "learning_rate": 1.1379114235259077e-05, "loss": 1.663, "step": 1046 }, { "epoch": 0.967652495378928, "grad_norm": 2.25, "learning_rate": 1.1363971153326085e-05, "loss": 1.7666, "step": 1047 }, { "epoch": 0.9685767097966729, "grad_norm": 2.296875, "learning_rate": 1.134882488366556e-05, "loss": 1.7926, "step": 1048 }, { "epoch": 0.9695009242144177, "grad_norm": 2.28125, "learning_rate": 1.133367546167574e-05, "loss": 1.7318, "step": 1049 }, { "epoch": 0.9704251386321626, "grad_norm": 2.3125, "learning_rate": 1.1318522922762233e-05, "loss": 1.7762, "step": 1050 }, { "epoch": 0.9713493530499075, "grad_norm": 2.359375, "learning_rate": 1.1303367302337937e-05, "loss": 1.7529, "step": 1051 }, { "epoch": 0.9722735674676525, "grad_norm": 2.375, "learning_rate": 1.1288208635822947e-05, "loss": 1.7774, "step": 1052 }, { "epoch": 0.9731977818853974, "grad_norm": 2.203125, "learning_rate": 1.1273046958644476e-05, "loss": 1.6978, "step": 1053 }, { "epoch": 0.9741219963031423, "grad_norm": 2.625, "learning_rate": 1.1257882306236776e-05, "loss": 1.8488, "step": 1054 }, { "epoch": 0.9750462107208873, "grad_norm": 2.296875, "learning_rate": 1.1242714714041046e-05, "loss": 1.8155, "step": 1055 }, { "epoch": 0.9759704251386322, "grad_norm": 2.3125, "learning_rate": 1.1227544217505368e-05, "loss": 1.741, "step": 1056 }, { "epoch": 0.9768946395563771, "grad_norm": 2.140625, "learning_rate": 1.1212370852084603e-05, "loss": 1.8001, "step": 1057 }, { "epoch": 0.977818853974122, "grad_norm": 2.234375, "learning_rate": 1.1197194653240314e-05, "loss": 1.7306, "step": 1058 }, { "epoch": 0.9787430683918669, "grad_norm": 2.203125, "learning_rate": 1.1182015656440692e-05, "loss": 1.7079, "step": 1059 }, { "epoch": 0.9796672828096118, "grad_norm": 2.203125, "learning_rate": 1.1166833897160465e-05, "loss": 1.8404, "step": 1060 }, { "epoch": 0.9805914972273567, "grad_norm": 2.28125, "learning_rate": 1.1151649410880823e-05, "loss": 1.7201, "step": 1061 }, { "epoch": 0.9815157116451017, "grad_norm": 2.265625, "learning_rate": 1.1136462233089318e-05, "loss": 1.763, "step": 1062 }, { "epoch": 0.9824399260628466, "grad_norm": 2.453125, "learning_rate": 1.1121272399279795e-05, "loss": 1.7126, "step": 1063 }, { "epoch": 0.9833641404805915, "grad_norm": 2.25, "learning_rate": 1.1106079944952317e-05, "loss": 1.7772, "step": 1064 }, { "epoch": 0.9842883548983364, "grad_norm": 2.3125, "learning_rate": 1.109088490561306e-05, "loss": 1.7543, "step": 1065 }, { "epoch": 0.9852125693160814, "grad_norm": 2.171875, "learning_rate": 1.1075687316774246e-05, "loss": 1.6266, "step": 1066 }, { "epoch": 0.9861367837338263, "grad_norm": 2.234375, "learning_rate": 1.1060487213954053e-05, "loss": 1.8199, "step": 1067 }, { "epoch": 0.9870609981515711, "grad_norm": 2.25, "learning_rate": 1.1045284632676535e-05, "loss": 1.6765, "step": 1068 }, { "epoch": 0.987985212569316, "grad_norm": 2.3125, "learning_rate": 1.1030079608471544e-05, "loss": 1.7144, "step": 1069 }, { "epoch": 0.988909426987061, "grad_norm": 2.234375, "learning_rate": 1.1014872176874635e-05, "loss": 1.7569, "step": 1070 }, { "epoch": 0.9898336414048059, "grad_norm": 2.375, "learning_rate": 1.099966237342699e-05, "loss": 1.7365, "step": 1071 }, { "epoch": 0.9907578558225508, "grad_norm": 2.1875, "learning_rate": 1.0984450233675334e-05, "loss": 1.7409, "step": 1072 }, { "epoch": 0.9916820702402958, "grad_norm": 2.34375, "learning_rate": 1.0969235793171858e-05, "loss": 1.7435, "step": 1073 }, { "epoch": 0.9926062846580407, "grad_norm": 2.203125, "learning_rate": 1.0954019087474123e-05, "loss": 1.749, "step": 1074 }, { "epoch": 0.9935304990757856, "grad_norm": 2.4375, "learning_rate": 1.0938800152144984e-05, "loss": 1.8421, "step": 1075 }, { "epoch": 0.9944547134935305, "grad_norm": 2.234375, "learning_rate": 1.092357902275251e-05, "loss": 1.6558, "step": 1076 }, { "epoch": 0.9953789279112755, "grad_norm": 2.21875, "learning_rate": 1.09083557348699e-05, "loss": 1.7726, "step": 1077 }, { "epoch": 0.9963031423290203, "grad_norm": 2.203125, "learning_rate": 1.0893130324075396e-05, "loss": 1.7757, "step": 1078 }, { "epoch": 0.9972273567467652, "grad_norm": 2.296875, "learning_rate": 1.0877902825952198e-05, "loss": 1.7686, "step": 1079 }, { "epoch": 0.9981515711645101, "grad_norm": 2.3125, "learning_rate": 1.086267327608838e-05, "loss": 1.681, "step": 1080 }, { "epoch": 0.9990757855822551, "grad_norm": 2.234375, "learning_rate": 1.0847441710076826e-05, "loss": 1.7529, "step": 1081 }, { "epoch": 1.0, "grad_norm": 2.46875, "learning_rate": 1.0832208163515123e-05, "loss": 1.7201, "step": 1082 }, { "epoch": 1.0009242144177448, "grad_norm": 2.375, "learning_rate": 1.0816972672005483e-05, "loss": 1.7402, "step": 1083 }, { "epoch": 1.0018484288354899, "grad_norm": 2.25, "learning_rate": 1.080173527115467e-05, "loss": 1.6441, "step": 1084 }, { "epoch": 1.0027726432532347, "grad_norm": 2.140625, "learning_rate": 1.0786495996573907e-05, "loss": 1.6349, "step": 1085 }, { "epoch": 1.0036968576709797, "grad_norm": 2.1875, "learning_rate": 1.0771254883878796e-05, "loss": 1.6627, "step": 1086 }, { "epoch": 1.0046210720887245, "grad_norm": 2.234375, "learning_rate": 1.0756011968689242e-05, "loss": 1.6892, "step": 1087 }, { "epoch": 1.0055452865064696, "grad_norm": 2.421875, "learning_rate": 1.074076728662935e-05, "loss": 1.6846, "step": 1088 }, { "epoch": 1.0064695009242144, "grad_norm": 2.3125, "learning_rate": 1.0725520873327361e-05, "loss": 1.7464, "step": 1089 }, { "epoch": 1.0073937153419594, "grad_norm": 2.265625, "learning_rate": 1.0710272764415566e-05, "loss": 1.6771, "step": 1090 }, { "epoch": 1.0083179297597042, "grad_norm": 2.296875, "learning_rate": 1.069502299553021e-05, "loss": 1.5688, "step": 1091 }, { "epoch": 1.009242144177449, "grad_norm": 2.25, "learning_rate": 1.0679771602311429e-05, "loss": 1.6627, "step": 1092 }, { "epoch": 1.010166358595194, "grad_norm": 2.21875, "learning_rate": 1.0664518620403143e-05, "loss": 1.7261, "step": 1093 }, { "epoch": 1.011090573012939, "grad_norm": 2.296875, "learning_rate": 1.0649264085452988e-05, "loss": 1.7151, "step": 1094 }, { "epoch": 1.012014787430684, "grad_norm": 2.203125, "learning_rate": 1.063400803311224e-05, "loss": 1.6986, "step": 1095 }, { "epoch": 1.0129390018484288, "grad_norm": 2.3125, "learning_rate": 1.0618750499035711e-05, "loss": 1.6223, "step": 1096 }, { "epoch": 1.0138632162661738, "grad_norm": 2.21875, "learning_rate": 1.060349151888167e-05, "loss": 1.723, "step": 1097 }, { "epoch": 1.0147874306839186, "grad_norm": 2.265625, "learning_rate": 1.0588231128311783e-05, "loss": 1.5908, "step": 1098 }, { "epoch": 1.0157116451016637, "grad_norm": 2.296875, "learning_rate": 1.0572969362991e-05, "loss": 1.6652, "step": 1099 }, { "epoch": 1.0166358595194085, "grad_norm": 2.25, "learning_rate": 1.0557706258587489e-05, "loss": 1.6731, "step": 1100 }, { "epoch": 1.0175600739371533, "grad_norm": 2.296875, "learning_rate": 1.0542441850772542e-05, "loss": 1.6589, "step": 1101 }, { "epoch": 1.0184842883548983, "grad_norm": 2.21875, "learning_rate": 1.0527176175220499e-05, "loss": 1.7045, "step": 1102 }, { "epoch": 1.0194085027726432, "grad_norm": 2.28125, "learning_rate": 1.0511909267608674e-05, "loss": 1.6563, "step": 1103 }, { "epoch": 1.0203327171903882, "grad_norm": 2.375, "learning_rate": 1.049664116361724e-05, "loss": 1.6612, "step": 1104 }, { "epoch": 1.021256931608133, "grad_norm": 2.25, "learning_rate": 1.0481371898929187e-05, "loss": 1.6899, "step": 1105 }, { "epoch": 1.022181146025878, "grad_norm": 2.171875, "learning_rate": 1.04661015092302e-05, "loss": 1.5654, "step": 1106 }, { "epoch": 1.0231053604436229, "grad_norm": 2.265625, "learning_rate": 1.0450830030208604e-05, "loss": 1.6492, "step": 1107 }, { "epoch": 1.024029574861368, "grad_norm": 2.234375, "learning_rate": 1.0435557497555267e-05, "loss": 1.6367, "step": 1108 }, { "epoch": 1.0249537892791127, "grad_norm": 2.28125, "learning_rate": 1.042028394696352e-05, "loss": 1.6599, "step": 1109 }, { "epoch": 1.0258780036968578, "grad_norm": 2.265625, "learning_rate": 1.0405009414129073e-05, "loss": 1.6892, "step": 1110 }, { "epoch": 1.0268022181146026, "grad_norm": 2.3125, "learning_rate": 1.0389733934749924e-05, "loss": 1.7044, "step": 1111 }, { "epoch": 1.0277264325323474, "grad_norm": 2.25, "learning_rate": 1.03744575445263e-05, "loss": 1.7083, "step": 1112 }, { "epoch": 1.0286506469500925, "grad_norm": 2.265625, "learning_rate": 1.0359180279160544e-05, "loss": 1.7404, "step": 1113 }, { "epoch": 1.0295748613678373, "grad_norm": 2.296875, "learning_rate": 1.034390217435704e-05, "loss": 1.6475, "step": 1114 }, { "epoch": 1.0304990757855823, "grad_norm": 2.171875, "learning_rate": 1.0328623265822148e-05, "loss": 1.6842, "step": 1115 }, { "epoch": 1.0314232902033271, "grad_norm": 2.1875, "learning_rate": 1.0313343589264097e-05, "loss": 1.6968, "step": 1116 }, { "epoch": 1.0323475046210722, "grad_norm": 2.25, "learning_rate": 1.0298063180392916e-05, "loss": 1.5967, "step": 1117 }, { "epoch": 1.033271719038817, "grad_norm": 2.265625, "learning_rate": 1.028278207492034e-05, "loss": 1.6485, "step": 1118 }, { "epoch": 1.034195933456562, "grad_norm": 2.328125, "learning_rate": 1.0267500308559733e-05, "loss": 1.7137, "step": 1119 }, { "epoch": 1.0351201478743068, "grad_norm": 2.140625, "learning_rate": 1.025221791702601e-05, "loss": 1.6419, "step": 1120 }, { "epoch": 1.0360443622920517, "grad_norm": 2.265625, "learning_rate": 1.0236934936035539e-05, "loss": 1.6602, "step": 1121 }, { "epoch": 1.0369685767097967, "grad_norm": 2.15625, "learning_rate": 1.022165140130607e-05, "loss": 1.6326, "step": 1122 }, { "epoch": 1.0378927911275415, "grad_norm": 2.125, "learning_rate": 1.0206367348556647e-05, "loss": 1.5986, "step": 1123 }, { "epoch": 1.0388170055452866, "grad_norm": 2.21875, "learning_rate": 1.0191082813507521e-05, "loss": 1.7337, "step": 1124 }, { "epoch": 1.0397412199630314, "grad_norm": 2.359375, "learning_rate": 1.0175797831880073e-05, "loss": 1.7409, "step": 1125 }, { "epoch": 1.0406654343807764, "grad_norm": 2.25, "learning_rate": 1.0160512439396735e-05, "loss": 1.6662, "step": 1126 }, { "epoch": 1.0415896487985212, "grad_norm": 2.1875, "learning_rate": 1.0145226671780885e-05, "loss": 1.655, "step": 1127 }, { "epoch": 1.0425138632162663, "grad_norm": 2.203125, "learning_rate": 1.0129940564756784e-05, "loss": 1.663, "step": 1128 }, { "epoch": 1.043438077634011, "grad_norm": 2.296875, "learning_rate": 1.011465415404949e-05, "loss": 1.7817, "step": 1129 }, { "epoch": 1.044362292051756, "grad_norm": 2.234375, "learning_rate": 1.009936747538477e-05, "loss": 1.7201, "step": 1130 }, { "epoch": 1.045286506469501, "grad_norm": 2.28125, "learning_rate": 1.0084080564489006e-05, "loss": 1.703, "step": 1131 }, { "epoch": 1.0462107208872458, "grad_norm": 2.21875, "learning_rate": 1.0068793457089141e-05, "loss": 1.7028, "step": 1132 }, { "epoch": 1.0471349353049908, "grad_norm": 2.265625, "learning_rate": 1.0053506188912563e-05, "loss": 1.6678, "step": 1133 }, { "epoch": 1.0480591497227356, "grad_norm": 2.21875, "learning_rate": 1.003821879568704e-05, "loss": 1.7261, "step": 1134 }, { "epoch": 1.0489833641404807, "grad_norm": 2.203125, "learning_rate": 1.0022931313140639e-05, "loss": 1.5828, "step": 1135 }, { "epoch": 1.0499075785582255, "grad_norm": 2.234375, "learning_rate": 1.0007643777001618e-05, "loss": 1.7158, "step": 1136 }, { "epoch": 1.0508317929759705, "grad_norm": 2.234375, "learning_rate": 9.992356222998383e-06, "loss": 1.7005, "step": 1137 }, { "epoch": 1.0517560073937153, "grad_norm": 2.203125, "learning_rate": 9.977068686859366e-06, "loss": 1.7228, "step": 1138 }, { "epoch": 1.0526802218114604, "grad_norm": 2.1875, "learning_rate": 9.961781204312961e-06, "loss": 1.6846, "step": 1139 }, { "epoch": 1.0536044362292052, "grad_norm": 2.171875, "learning_rate": 9.946493811087438e-06, "loss": 1.6803, "step": 1140 }, { "epoch": 1.05452865064695, "grad_norm": 2.296875, "learning_rate": 9.93120654291086e-06, "loss": 1.7282, "step": 1141 }, { "epoch": 1.055452865064695, "grad_norm": 2.203125, "learning_rate": 9.915919435510995e-06, "loss": 1.6489, "step": 1142 }, { "epoch": 1.0563770794824399, "grad_norm": 2.171875, "learning_rate": 9.900632524615235e-06, "loss": 1.7029, "step": 1143 }, { "epoch": 1.057301293900185, "grad_norm": 2.1875, "learning_rate": 9.88534584595051e-06, "loss": 1.6978, "step": 1144 }, { "epoch": 1.0582255083179297, "grad_norm": 2.171875, "learning_rate": 9.870059435243216e-06, "loss": 1.6167, "step": 1145 }, { "epoch": 1.0591497227356748, "grad_norm": 2.203125, "learning_rate": 9.854773328219118e-06, "loss": 1.752, "step": 1146 }, { "epoch": 1.0600739371534196, "grad_norm": 2.265625, "learning_rate": 9.839487560603266e-06, "loss": 1.688, "step": 1147 }, { "epoch": 1.0609981515711646, "grad_norm": 2.15625, "learning_rate": 9.824202168119928e-06, "loss": 1.636, "step": 1148 }, { "epoch": 1.0619223659889094, "grad_norm": 2.234375, "learning_rate": 9.80891718649248e-06, "loss": 1.7069, "step": 1149 }, { "epoch": 1.0628465804066543, "grad_norm": 2.234375, "learning_rate": 9.793632651443356e-06, "loss": 1.7015, "step": 1150 }, { "epoch": 1.0637707948243993, "grad_norm": 2.140625, "learning_rate": 9.778348598693933e-06, "loss": 1.6519, "step": 1151 }, { "epoch": 1.064695009242144, "grad_norm": 2.203125, "learning_rate": 9.763065063964465e-06, "loss": 1.6563, "step": 1152 }, { "epoch": 1.0656192236598891, "grad_norm": 2.203125, "learning_rate": 9.747782082973995e-06, "loss": 1.701, "step": 1153 }, { "epoch": 1.066543438077634, "grad_norm": 2.203125, "learning_rate": 9.732499691440267e-06, "loss": 1.696, "step": 1154 }, { "epoch": 1.067467652495379, "grad_norm": 2.1875, "learning_rate": 9.717217925079663e-06, "loss": 1.6334, "step": 1155 }, { "epoch": 1.0683918669131238, "grad_norm": 2.171875, "learning_rate": 9.701936819607086e-06, "loss": 1.6638, "step": 1156 }, { "epoch": 1.0693160813308689, "grad_norm": 2.3125, "learning_rate": 9.686656410735906e-06, "loss": 1.7437, "step": 1157 }, { "epoch": 1.0702402957486137, "grad_norm": 2.28125, "learning_rate": 9.671376734177857e-06, "loss": 1.7062, "step": 1158 }, { "epoch": 1.0711645101663585, "grad_norm": 2.1875, "learning_rate": 9.65609782564296e-06, "loss": 1.6728, "step": 1159 }, { "epoch": 1.0720887245841035, "grad_norm": 2.234375, "learning_rate": 9.64081972083946e-06, "loss": 1.7693, "step": 1160 }, { "epoch": 1.0730129390018484, "grad_norm": 2.21875, "learning_rate": 9.625542455473703e-06, "loss": 1.6582, "step": 1161 }, { "epoch": 1.0739371534195934, "grad_norm": 2.265625, "learning_rate": 9.610266065250077e-06, "loss": 1.6998, "step": 1162 }, { "epoch": 1.0748613678373382, "grad_norm": 2.171875, "learning_rate": 9.594990585870934e-06, "loss": 1.7135, "step": 1163 }, { "epoch": 1.0757855822550833, "grad_norm": 2.28125, "learning_rate": 9.57971605303648e-06, "loss": 1.7311, "step": 1164 }, { "epoch": 1.076709796672828, "grad_norm": 2.21875, "learning_rate": 9.564442502444734e-06, "loss": 1.6864, "step": 1165 }, { "epoch": 1.077634011090573, "grad_norm": 2.203125, "learning_rate": 9.5491699697914e-06, "loss": 1.7015, "step": 1166 }, { "epoch": 1.078558225508318, "grad_norm": 2.171875, "learning_rate": 9.533898490769804e-06, "loss": 1.6583, "step": 1167 }, { "epoch": 1.0794824399260627, "grad_norm": 2.171875, "learning_rate": 9.518628101070818e-06, "loss": 1.6718, "step": 1168 }, { "epoch": 1.0804066543438078, "grad_norm": 2.265625, "learning_rate": 9.503358836382761e-06, "loss": 1.7867, "step": 1169 }, { "epoch": 1.0813308687615526, "grad_norm": 2.21875, "learning_rate": 9.488090732391331e-06, "loss": 1.6962, "step": 1170 }, { "epoch": 1.0822550831792976, "grad_norm": 2.21875, "learning_rate": 9.472823824779504e-06, "loss": 1.7186, "step": 1171 }, { "epoch": 1.0831792975970425, "grad_norm": 2.34375, "learning_rate": 9.457558149227463e-06, "loss": 1.6749, "step": 1172 }, { "epoch": 1.0841035120147875, "grad_norm": 2.25, "learning_rate": 9.442293741412516e-06, "loss": 1.7142, "step": 1173 }, { "epoch": 1.0850277264325323, "grad_norm": 2.234375, "learning_rate": 9.427030637009002e-06, "loss": 1.7111, "step": 1174 }, { "epoch": 1.0859519408502774, "grad_norm": 2.328125, "learning_rate": 9.411768871688219e-06, "loss": 1.6949, "step": 1175 }, { "epoch": 1.0868761552680222, "grad_norm": 2.171875, "learning_rate": 9.396508481118333e-06, "loss": 1.7108, "step": 1176 }, { "epoch": 1.087800369685767, "grad_norm": 2.171875, "learning_rate": 9.381249500964294e-06, "loss": 1.663, "step": 1177 }, { "epoch": 1.088724584103512, "grad_norm": 2.203125, "learning_rate": 9.36599196688776e-06, "loss": 1.7298, "step": 1178 }, { "epoch": 1.0896487985212568, "grad_norm": 2.25, "learning_rate": 9.350735914547012e-06, "loss": 1.6764, "step": 1179 }, { "epoch": 1.0905730129390019, "grad_norm": 2.1875, "learning_rate": 9.33548137959686e-06, "loss": 1.6757, "step": 1180 }, { "epoch": 1.0914972273567467, "grad_norm": 2.3125, "learning_rate": 9.320228397688575e-06, "loss": 1.7313, "step": 1181 }, { "epoch": 1.0924214417744917, "grad_norm": 2.140625, "learning_rate": 9.304977004469791e-06, "loss": 1.6382, "step": 1182 }, { "epoch": 1.0933456561922366, "grad_norm": 2.203125, "learning_rate": 9.289727235584436e-06, "loss": 1.6057, "step": 1183 }, { "epoch": 1.0942698706099816, "grad_norm": 2.1875, "learning_rate": 9.27447912667264e-06, "loss": 1.7057, "step": 1184 }, { "epoch": 1.0951940850277264, "grad_norm": 2.28125, "learning_rate": 9.259232713370654e-06, "loss": 1.7117, "step": 1185 }, { "epoch": 1.0961182994454712, "grad_norm": 2.25, "learning_rate": 9.243988031310761e-06, "loss": 1.6676, "step": 1186 }, { "epoch": 1.0970425138632163, "grad_norm": 2.1875, "learning_rate": 9.228745116121205e-06, "loss": 1.6438, "step": 1187 }, { "epoch": 1.097966728280961, "grad_norm": 2.40625, "learning_rate": 9.213504003426094e-06, "loss": 1.6125, "step": 1188 }, { "epoch": 1.0988909426987061, "grad_norm": 2.328125, "learning_rate": 9.198264728845332e-06, "loss": 1.6903, "step": 1189 }, { "epoch": 1.099815157116451, "grad_norm": 2.265625, "learning_rate": 9.183027327994519e-06, "loss": 1.7749, "step": 1190 }, { "epoch": 1.100739371534196, "grad_norm": 2.234375, "learning_rate": 9.16779183648488e-06, "loss": 1.6739, "step": 1191 }, { "epoch": 1.1016635859519408, "grad_norm": 2.15625, "learning_rate": 9.152558289923179e-06, "loss": 1.6347, "step": 1192 }, { "epoch": 1.1025878003696858, "grad_norm": 2.3125, "learning_rate": 9.13732672391162e-06, "loss": 1.6656, "step": 1193 }, { "epoch": 1.1035120147874307, "grad_norm": 2.203125, "learning_rate": 9.122097174047805e-06, "loss": 1.7041, "step": 1194 }, { "epoch": 1.1044362292051757, "grad_norm": 2.15625, "learning_rate": 9.106869675924605e-06, "loss": 1.6654, "step": 1195 }, { "epoch": 1.1053604436229205, "grad_norm": 2.296875, "learning_rate": 9.091644265130101e-06, "loss": 1.7383, "step": 1196 }, { "epoch": 1.1062846580406653, "grad_norm": 2.21875, "learning_rate": 9.076420977247493e-06, "loss": 1.7195, "step": 1197 }, { "epoch": 1.1072088724584104, "grad_norm": 2.296875, "learning_rate": 9.061199847855018e-06, "loss": 1.6982, "step": 1198 }, { "epoch": 1.1081330868761552, "grad_norm": 2.3125, "learning_rate": 9.045980912525878e-06, "loss": 1.7115, "step": 1199 }, { "epoch": 1.1090573012939002, "grad_norm": 2.203125, "learning_rate": 9.030764206828146e-06, "loss": 1.7063, "step": 1200 }, { "epoch": 1.109981515711645, "grad_norm": 2.40625, "learning_rate": 9.015549766324668e-06, "loss": 1.7588, "step": 1201 }, { "epoch": 1.11090573012939, "grad_norm": 2.21875, "learning_rate": 9.000337626573014e-06, "loss": 1.7724, "step": 1202 }, { "epoch": 1.111829944547135, "grad_norm": 2.203125, "learning_rate": 8.985127823125365e-06, "loss": 1.7107, "step": 1203 }, { "epoch": 1.11275415896488, "grad_norm": 2.203125, "learning_rate": 8.969920391528459e-06, "loss": 1.7027, "step": 1204 }, { "epoch": 1.1136783733826248, "grad_norm": 2.203125, "learning_rate": 8.954715367323468e-06, "loss": 1.7218, "step": 1205 }, { "epoch": 1.1146025878003696, "grad_norm": 2.171875, "learning_rate": 8.939512786045953e-06, "loss": 1.7096, "step": 1206 }, { "epoch": 1.1155268022181146, "grad_norm": 2.25, "learning_rate": 8.92431268322576e-06, "loss": 1.6417, "step": 1207 }, { "epoch": 1.1164510166358594, "grad_norm": 2.203125, "learning_rate": 8.909115094386943e-06, "loss": 1.6267, "step": 1208 }, { "epoch": 1.1173752310536045, "grad_norm": 2.140625, "learning_rate": 8.893920055047686e-06, "loss": 1.6038, "step": 1209 }, { "epoch": 1.1182994454713493, "grad_norm": 2.390625, "learning_rate": 8.878727600720207e-06, "loss": 1.6731, "step": 1210 }, { "epoch": 1.1192236598890943, "grad_norm": 2.25, "learning_rate": 8.863537766910688e-06, "loss": 1.6885, "step": 1211 }, { "epoch": 1.1201478743068392, "grad_norm": 2.15625, "learning_rate": 8.848350589119178e-06, "loss": 1.6652, "step": 1212 }, { "epoch": 1.1210720887245842, "grad_norm": 2.21875, "learning_rate": 8.833166102839535e-06, "loss": 1.643, "step": 1213 }, { "epoch": 1.121996303142329, "grad_norm": 2.3125, "learning_rate": 8.81798434355931e-06, "loss": 1.7911, "step": 1214 }, { "epoch": 1.122920517560074, "grad_norm": 2.1875, "learning_rate": 8.80280534675969e-06, "loss": 1.6125, "step": 1215 }, { "epoch": 1.1238447319778189, "grad_norm": 2.203125, "learning_rate": 8.787629147915402e-06, "loss": 1.6708, "step": 1216 }, { "epoch": 1.1247689463955637, "grad_norm": 2.1875, "learning_rate": 8.772455782494632e-06, "loss": 1.7758, "step": 1217 }, { "epoch": 1.1256931608133087, "grad_norm": 2.1875, "learning_rate": 8.757285285958954e-06, "loss": 1.657, "step": 1218 }, { "epoch": 1.1266173752310535, "grad_norm": 2.203125, "learning_rate": 8.742117693763229e-06, "loss": 1.7069, "step": 1219 }, { "epoch": 1.1275415896487986, "grad_norm": 2.203125, "learning_rate": 8.726953041355528e-06, "loss": 1.6104, "step": 1220 }, { "epoch": 1.1284658040665434, "grad_norm": 2.21875, "learning_rate": 8.711791364177056e-06, "loss": 1.6719, "step": 1221 }, { "epoch": 1.1293900184842884, "grad_norm": 2.15625, "learning_rate": 8.696632697662065e-06, "loss": 1.6633, "step": 1222 }, { "epoch": 1.1303142329020333, "grad_norm": 2.15625, "learning_rate": 8.681477077237768e-06, "loss": 1.6205, "step": 1223 }, { "epoch": 1.1312384473197783, "grad_norm": 2.21875, "learning_rate": 8.666324538324264e-06, "loss": 1.7377, "step": 1224 }, { "epoch": 1.1321626617375231, "grad_norm": 2.265625, "learning_rate": 8.651175116334444e-06, "loss": 1.6687, "step": 1225 }, { "epoch": 1.133086876155268, "grad_norm": 2.265625, "learning_rate": 8.636028846673917e-06, "loss": 1.7027, "step": 1226 }, { "epoch": 1.134011090573013, "grad_norm": 2.21875, "learning_rate": 8.620885764740925e-06, "loss": 1.7604, "step": 1227 }, { "epoch": 1.1349353049907578, "grad_norm": 2.203125, "learning_rate": 8.605745905926261e-06, "loss": 1.7464, "step": 1228 }, { "epoch": 1.1358595194085028, "grad_norm": 2.21875, "learning_rate": 8.590609305613184e-06, "loss": 1.6775, "step": 1229 }, { "epoch": 1.1367837338262476, "grad_norm": 2.25, "learning_rate": 8.57547599917734e-06, "loss": 1.764, "step": 1230 }, { "epoch": 1.1377079482439927, "grad_norm": 2.09375, "learning_rate": 8.560346021986672e-06, "loss": 1.5902, "step": 1231 }, { "epoch": 1.1386321626617375, "grad_norm": 2.1875, "learning_rate": 8.545219409401338e-06, "loss": 1.6642, "step": 1232 }, { "epoch": 1.1395563770794825, "grad_norm": 2.28125, "learning_rate": 8.530096196773652e-06, "loss": 1.6365, "step": 1233 }, { "epoch": 1.1404805914972274, "grad_norm": 2.140625, "learning_rate": 8.514976419447963e-06, "loss": 1.5889, "step": 1234 }, { "epoch": 1.1414048059149722, "grad_norm": 2.234375, "learning_rate": 8.499860112760602e-06, "loss": 1.6588, "step": 1235 }, { "epoch": 1.1423290203327172, "grad_norm": 2.265625, "learning_rate": 8.484747312039779e-06, "loss": 1.7309, "step": 1236 }, { "epoch": 1.143253234750462, "grad_norm": 2.484375, "learning_rate": 8.469638052605513e-06, "loss": 1.7574, "step": 1237 }, { "epoch": 1.144177449168207, "grad_norm": 2.234375, "learning_rate": 8.454532369769561e-06, "loss": 1.6976, "step": 1238 }, { "epoch": 1.145101663585952, "grad_norm": 2.125, "learning_rate": 8.439430298835305e-06, "loss": 1.5859, "step": 1239 }, { "epoch": 1.146025878003697, "grad_norm": 2.203125, "learning_rate": 8.424331875097688e-06, "loss": 1.7248, "step": 1240 }, { "epoch": 1.1469500924214417, "grad_norm": 2.1875, "learning_rate": 8.40923713384313e-06, "loss": 1.663, "step": 1241 }, { "epoch": 1.1478743068391868, "grad_norm": 2.203125, "learning_rate": 8.394146110349449e-06, "loss": 1.6929, "step": 1242 }, { "epoch": 1.1487985212569316, "grad_norm": 2.203125, "learning_rate": 8.379058839885774e-06, "loss": 1.6407, "step": 1243 }, { "epoch": 1.1497227356746764, "grad_norm": 2.125, "learning_rate": 8.363975357712457e-06, "loss": 1.6776, "step": 1244 }, { "epoch": 1.1506469500924215, "grad_norm": 2.1875, "learning_rate": 8.348895699081e-06, "loss": 1.7499, "step": 1245 }, { "epoch": 1.1515711645101663, "grad_norm": 2.265625, "learning_rate": 8.33381989923397e-06, "loss": 1.6716, "step": 1246 }, { "epoch": 1.1524953789279113, "grad_norm": 2.15625, "learning_rate": 8.31874799340491e-06, "loss": 1.6767, "step": 1247 }, { "epoch": 1.1534195933456561, "grad_norm": 2.1875, "learning_rate": 8.303680016818275e-06, "loss": 1.6544, "step": 1248 }, { "epoch": 1.1543438077634012, "grad_norm": 2.1875, "learning_rate": 8.288616004689321e-06, "loss": 1.6572, "step": 1249 }, { "epoch": 1.155268022181146, "grad_norm": 2.1875, "learning_rate": 8.27355599222405e-06, "loss": 1.7152, "step": 1250 }, { "epoch": 1.156192236598891, "grad_norm": 2.21875, "learning_rate": 8.258500014619112e-06, "loss": 1.6723, "step": 1251 }, { "epoch": 1.1571164510166358, "grad_norm": 2.265625, "learning_rate": 8.243448107061728e-06, "loss": 1.7413, "step": 1252 }, { "epoch": 1.1580406654343807, "grad_norm": 2.1875, "learning_rate": 8.228400304729609e-06, "loss": 1.6629, "step": 1253 }, { "epoch": 1.1589648798521257, "grad_norm": 2.1875, "learning_rate": 8.213356642790867e-06, "loss": 1.7081, "step": 1254 }, { "epoch": 1.1598890942698705, "grad_norm": 2.28125, "learning_rate": 8.198317156403941e-06, "loss": 1.7335, "step": 1255 }, { "epoch": 1.1608133086876156, "grad_norm": 2.171875, "learning_rate": 8.18328188071751e-06, "loss": 1.6918, "step": 1256 }, { "epoch": 1.1617375231053604, "grad_norm": 2.203125, "learning_rate": 8.168250850870412e-06, "loss": 1.7469, "step": 1257 }, { "epoch": 1.1626617375231054, "grad_norm": 2.203125, "learning_rate": 8.153224101991565e-06, "loss": 1.6904, "step": 1258 }, { "epoch": 1.1635859519408502, "grad_norm": 2.1875, "learning_rate": 8.138201669199878e-06, "loss": 1.6588, "step": 1259 }, { "epoch": 1.1645101663585953, "grad_norm": 2.375, "learning_rate": 8.123183587604176e-06, "loss": 1.756, "step": 1260 }, { "epoch": 1.16543438077634, "grad_norm": 2.359375, "learning_rate": 8.108169892303111e-06, "loss": 1.686, "step": 1261 }, { "epoch": 1.166358595194085, "grad_norm": 2.25, "learning_rate": 8.093160618385088e-06, "loss": 1.7059, "step": 1262 }, { "epoch": 1.16728280961183, "grad_norm": 2.171875, "learning_rate": 8.078155800928178e-06, "loss": 1.6089, "step": 1263 }, { "epoch": 1.1682070240295748, "grad_norm": 2.234375, "learning_rate": 8.063155475000037e-06, "loss": 1.6881, "step": 1264 }, { "epoch": 1.1691312384473198, "grad_norm": 2.21875, "learning_rate": 8.048159675657819e-06, "loss": 1.7123, "step": 1265 }, { "epoch": 1.1700554528650646, "grad_norm": 2.234375, "learning_rate": 8.033168437948103e-06, "loss": 1.7298, "step": 1266 }, { "epoch": 1.1709796672828097, "grad_norm": 2.1875, "learning_rate": 8.01818179690681e-06, "loss": 1.7091, "step": 1267 }, { "epoch": 1.1719038817005545, "grad_norm": 2.25, "learning_rate": 8.003199787559112e-06, "loss": 1.6816, "step": 1268 }, { "epoch": 1.1728280961182995, "grad_norm": 2.234375, "learning_rate": 7.988222444919364e-06, "loss": 1.7283, "step": 1269 }, { "epoch": 1.1737523105360443, "grad_norm": 2.265625, "learning_rate": 7.973249803991006e-06, "loss": 1.7014, "step": 1270 }, { "epoch": 1.1746765249537892, "grad_norm": 2.265625, "learning_rate": 7.958281899766487e-06, "loss": 1.7745, "step": 1271 }, { "epoch": 1.1756007393715342, "grad_norm": 2.265625, "learning_rate": 7.943318767227205e-06, "loss": 1.6489, "step": 1272 }, { "epoch": 1.1765249537892792, "grad_norm": 2.21875, "learning_rate": 7.928360441343387e-06, "loss": 1.7413, "step": 1273 }, { "epoch": 1.177449168207024, "grad_norm": 2.15625, "learning_rate": 7.913406957074038e-06, "loss": 1.6366, "step": 1274 }, { "epoch": 1.1783733826247689, "grad_norm": 2.25, "learning_rate": 7.898458349366832e-06, "loss": 1.7147, "step": 1275 }, { "epoch": 1.179297597042514, "grad_norm": 2.15625, "learning_rate": 7.883514653158061e-06, "loss": 1.6752, "step": 1276 }, { "epoch": 1.1802218114602587, "grad_norm": 2.234375, "learning_rate": 7.868575903372539e-06, "loss": 1.7498, "step": 1277 }, { "epoch": 1.1811460258780038, "grad_norm": 2.171875, "learning_rate": 7.853642134923511e-06, "loss": 1.6209, "step": 1278 }, { "epoch": 1.1820702402957486, "grad_norm": 2.21875, "learning_rate": 7.838713382712583e-06, "loss": 1.7511, "step": 1279 }, { "epoch": 1.1829944547134936, "grad_norm": 2.203125, "learning_rate": 7.823789681629642e-06, "loss": 1.6565, "step": 1280 }, { "epoch": 1.1839186691312384, "grad_norm": 2.21875, "learning_rate": 7.808871066552756e-06, "loss": 1.761, "step": 1281 }, { "epoch": 1.1848428835489835, "grad_norm": 2.21875, "learning_rate": 7.793957572348132e-06, "loss": 1.7429, "step": 1282 }, { "epoch": 1.1857670979667283, "grad_norm": 2.375, "learning_rate": 7.779049233869985e-06, "loss": 1.6138, "step": 1283 }, { "epoch": 1.1866913123844731, "grad_norm": 2.21875, "learning_rate": 7.76414608596049e-06, "loss": 1.6289, "step": 1284 }, { "epoch": 1.1876155268022182, "grad_norm": 2.171875, "learning_rate": 7.749248163449693e-06, "loss": 1.7154, "step": 1285 }, { "epoch": 1.188539741219963, "grad_norm": 2.265625, "learning_rate": 7.734355501155422e-06, "loss": 1.6469, "step": 1286 }, { "epoch": 1.189463955637708, "grad_norm": 2.15625, "learning_rate": 7.719468133883219e-06, "loss": 1.7062, "step": 1287 }, { "epoch": 1.1903881700554528, "grad_norm": 2.140625, "learning_rate": 7.704586096426247e-06, "loss": 1.651, "step": 1288 }, { "epoch": 1.1913123844731979, "grad_norm": 2.203125, "learning_rate": 7.68970942356521e-06, "loss": 1.7599, "step": 1289 }, { "epoch": 1.1922365988909427, "grad_norm": 2.21875, "learning_rate": 7.674838150068275e-06, "loss": 1.7037, "step": 1290 }, { "epoch": 1.1931608133086877, "grad_norm": 2.1875, "learning_rate": 7.659972310691e-06, "loss": 1.6401, "step": 1291 }, { "epoch": 1.1940850277264325, "grad_norm": 2.234375, "learning_rate": 7.64511194017623e-06, "loss": 1.662, "step": 1292 }, { "epoch": 1.1950092421441774, "grad_norm": 2.1875, "learning_rate": 7.630257073254035e-06, "loss": 1.6613, "step": 1293 }, { "epoch": 1.1959334565619224, "grad_norm": 2.21875, "learning_rate": 7.615407744641618e-06, "loss": 1.7004, "step": 1294 }, { "epoch": 1.1968576709796672, "grad_norm": 2.15625, "learning_rate": 7.600563989043247e-06, "loss": 1.6239, "step": 1295 }, { "epoch": 1.1977818853974123, "grad_norm": 2.140625, "learning_rate": 7.5857258411501596e-06, "loss": 1.6252, "step": 1296 }, { "epoch": 1.198706099815157, "grad_norm": 2.234375, "learning_rate": 7.570893335640488e-06, "loss": 1.6812, "step": 1297 }, { "epoch": 1.1996303142329021, "grad_norm": 2.234375, "learning_rate": 7.556066507179177e-06, "loss": 1.7231, "step": 1298 }, { "epoch": 1.200554528650647, "grad_norm": 2.140625, "learning_rate": 7.541245390417906e-06, "loss": 1.7012, "step": 1299 }, { "epoch": 1.201478743068392, "grad_norm": 2.15625, "learning_rate": 7.526430019995001e-06, "loss": 1.6178, "step": 1300 }, { "epoch": 1.2024029574861368, "grad_norm": 2.1875, "learning_rate": 7.511620430535367e-06, "loss": 1.6295, "step": 1301 }, { "epoch": 1.2033271719038816, "grad_norm": 2.21875, "learning_rate": 7.496816656650389e-06, "loss": 1.6817, "step": 1302 }, { "epoch": 1.2042513863216266, "grad_norm": 2.15625, "learning_rate": 7.482018732937868e-06, "loss": 1.6994, "step": 1303 }, { "epoch": 1.2051756007393715, "grad_norm": 2.296875, "learning_rate": 7.467226693981926e-06, "loss": 1.7356, "step": 1304 }, { "epoch": 1.2060998151571165, "grad_norm": 2.1875, "learning_rate": 7.452440574352932e-06, "loss": 1.7073, "step": 1305 }, { "epoch": 1.2070240295748613, "grad_norm": 2.21875, "learning_rate": 7.437660408607432e-06, "loss": 1.7186, "step": 1306 }, { "epoch": 1.2079482439926064, "grad_norm": 2.1875, "learning_rate": 7.422886231288042e-06, "loss": 1.6317, "step": 1307 }, { "epoch": 1.2088724584103512, "grad_norm": 2.1875, "learning_rate": 7.408118076923394e-06, "loss": 1.6258, "step": 1308 }, { "epoch": 1.2097966728280962, "grad_norm": 2.203125, "learning_rate": 7.393355980028039e-06, "loss": 1.7462, "step": 1309 }, { "epoch": 1.210720887245841, "grad_norm": 2.109375, "learning_rate": 7.378599975102361e-06, "loss": 1.6689, "step": 1310 }, { "epoch": 1.2116451016635859, "grad_norm": 2.140625, "learning_rate": 7.36385009663253e-06, "loss": 1.6948, "step": 1311 }, { "epoch": 1.212569316081331, "grad_norm": 2.140625, "learning_rate": 7.349106379090382e-06, "loss": 1.6806, "step": 1312 }, { "epoch": 1.2134935304990757, "grad_norm": 2.1875, "learning_rate": 7.334368856933356e-06, "loss": 1.622, "step": 1313 }, { "epoch": 1.2144177449168208, "grad_norm": 2.265625, "learning_rate": 7.319637564604412e-06, "loss": 1.7048, "step": 1314 }, { "epoch": 1.2153419593345656, "grad_norm": 2.25, "learning_rate": 7.304912536531945e-06, "loss": 1.7063, "step": 1315 }, { "epoch": 1.2162661737523106, "grad_norm": 2.21875, "learning_rate": 7.290193807129728e-06, "loss": 1.6709, "step": 1316 }, { "epoch": 1.2171903881700554, "grad_norm": 2.21875, "learning_rate": 7.275481410796799e-06, "loss": 1.6511, "step": 1317 }, { "epoch": 1.2181146025878005, "grad_norm": 2.1875, "learning_rate": 7.260775381917392e-06, "loss": 1.6339, "step": 1318 }, { "epoch": 1.2190388170055453, "grad_norm": 2.3125, "learning_rate": 7.246075754860868e-06, "loss": 1.8067, "step": 1319 }, { "epoch": 1.21996303142329, "grad_norm": 2.234375, "learning_rate": 7.23138256398162e-06, "loss": 1.6221, "step": 1320 }, { "epoch": 1.2208872458410351, "grad_norm": 2.171875, "learning_rate": 7.216695843619013e-06, "loss": 1.5519, "step": 1321 }, { "epoch": 1.22181146025878, "grad_norm": 2.1875, "learning_rate": 7.20201562809727e-06, "loss": 1.6872, "step": 1322 }, { "epoch": 1.222735674676525, "grad_norm": 2.21875, "learning_rate": 7.187341951725424e-06, "loss": 1.6383, "step": 1323 }, { "epoch": 1.2236598890942698, "grad_norm": 2.25, "learning_rate": 7.172674848797218e-06, "loss": 1.6802, "step": 1324 }, { "epoch": 1.2245841035120149, "grad_norm": 2.1875, "learning_rate": 7.1580143535910475e-06, "loss": 1.6682, "step": 1325 }, { "epoch": 1.2255083179297597, "grad_norm": 2.1875, "learning_rate": 7.143360500369845e-06, "loss": 1.7064, "step": 1326 }, { "epoch": 1.2264325323475047, "grad_norm": 2.234375, "learning_rate": 7.128713323381032e-06, "loss": 1.5928, "step": 1327 }, { "epoch": 1.2273567467652495, "grad_norm": 2.203125, "learning_rate": 7.114072856856424e-06, "loss": 1.7363, "step": 1328 }, { "epoch": 1.2282809611829943, "grad_norm": 2.203125, "learning_rate": 7.099439135012154e-06, "loss": 1.7233, "step": 1329 }, { "epoch": 1.2292051756007394, "grad_norm": 2.1875, "learning_rate": 7.084812192048593e-06, "loss": 1.7236, "step": 1330 }, { "epoch": 1.2301293900184842, "grad_norm": 2.125, "learning_rate": 7.070192062150269e-06, "loss": 1.5934, "step": 1331 }, { "epoch": 1.2310536044362292, "grad_norm": 2.234375, "learning_rate": 7.055578779485784e-06, "loss": 1.621, "step": 1332 }, { "epoch": 1.231977818853974, "grad_norm": 2.1875, "learning_rate": 7.040972378207739e-06, "loss": 1.7263, "step": 1333 }, { "epoch": 1.232902033271719, "grad_norm": 2.234375, "learning_rate": 7.026372892452653e-06, "loss": 1.7424, "step": 1334 }, { "epoch": 1.233826247689464, "grad_norm": 2.140625, "learning_rate": 7.011780356340888e-06, "loss": 1.6029, "step": 1335 }, { "epoch": 1.234750462107209, "grad_norm": 2.25, "learning_rate": 6.997194803976556e-06, "loss": 1.693, "step": 1336 }, { "epoch": 1.2356746765249538, "grad_norm": 2.21875, "learning_rate": 6.982616269447449e-06, "loss": 1.6927, "step": 1337 }, { "epoch": 1.2365988909426986, "grad_norm": 2.171875, "learning_rate": 6.9680447868249615e-06, "loss": 1.7218, "step": 1338 }, { "epoch": 1.2375231053604436, "grad_norm": 2.359375, "learning_rate": 6.953480390164001e-06, "loss": 1.5644, "step": 1339 }, { "epoch": 1.2384473197781884, "grad_norm": 2.265625, "learning_rate": 6.9389231135029225e-06, "loss": 1.6842, "step": 1340 }, { "epoch": 1.2393715341959335, "grad_norm": 2.1875, "learning_rate": 6.924372990863434e-06, "loss": 1.6772, "step": 1341 }, { "epoch": 1.2402957486136783, "grad_norm": 2.21875, "learning_rate": 6.909830056250527e-06, "loss": 1.7295, "step": 1342 }, { "epoch": 1.2412199630314233, "grad_norm": 2.171875, "learning_rate": 6.8952943436523934e-06, "loss": 1.7222, "step": 1343 }, { "epoch": 1.2421441774491682, "grad_norm": 2.171875, "learning_rate": 6.880765887040344e-06, "loss": 1.6912, "step": 1344 }, { "epoch": 1.2430683918669132, "grad_norm": 2.21875, "learning_rate": 6.866244720368738e-06, "loss": 1.8309, "step": 1345 }, { "epoch": 1.243992606284658, "grad_norm": 2.25, "learning_rate": 6.8517308775748915e-06, "loss": 1.6578, "step": 1346 }, { "epoch": 1.2449168207024028, "grad_norm": 2.1875, "learning_rate": 6.837224392579007e-06, "loss": 1.6651, "step": 1347 }, { "epoch": 1.2458410351201479, "grad_norm": 2.296875, "learning_rate": 6.822725299284091e-06, "loss": 1.6297, "step": 1348 }, { "epoch": 1.2467652495378927, "grad_norm": 2.15625, "learning_rate": 6.808233631575867e-06, "loss": 1.6741, "step": 1349 }, { "epoch": 1.2476894639556377, "grad_norm": 2.203125, "learning_rate": 6.793749423322719e-06, "loss": 1.7022, "step": 1350 }, { "epoch": 1.2486136783733826, "grad_norm": 2.3125, "learning_rate": 6.779272708375588e-06, "loss": 1.676, "step": 1351 }, { "epoch": 1.2495378927911276, "grad_norm": 2.125, "learning_rate": 6.764803520567905e-06, "loss": 1.6752, "step": 1352 }, { "epoch": 1.2504621072088724, "grad_norm": 2.21875, "learning_rate": 6.750341893715506e-06, "loss": 1.6942, "step": 1353 }, { "epoch": 1.2513863216266174, "grad_norm": 2.171875, "learning_rate": 6.735887861616555e-06, "loss": 1.641, "step": 1354 }, { "epoch": 1.2523105360443623, "grad_norm": 2.203125, "learning_rate": 6.721441458051481e-06, "loss": 1.6589, "step": 1355 }, { "epoch": 1.253234750462107, "grad_norm": 2.171875, "learning_rate": 6.70700271678287e-06, "loss": 1.6802, "step": 1356 }, { "epoch": 1.2541589648798521, "grad_norm": 2.21875, "learning_rate": 6.692571671555399e-06, "loss": 1.6807, "step": 1357 }, { "epoch": 1.2550831792975972, "grad_norm": 2.15625, "learning_rate": 6.678148356095768e-06, "loss": 1.6753, "step": 1358 }, { "epoch": 1.256007393715342, "grad_norm": 2.375, "learning_rate": 6.663732804112604e-06, "loss": 1.6712, "step": 1359 }, { "epoch": 1.2569316081330868, "grad_norm": 2.171875, "learning_rate": 6.649325049296401e-06, "loss": 1.7105, "step": 1360 }, { "epoch": 1.2578558225508318, "grad_norm": 2.234375, "learning_rate": 6.634925125319418e-06, "loss": 1.6819, "step": 1361 }, { "epoch": 1.2587800369685767, "grad_norm": 2.171875, "learning_rate": 6.620533065835617e-06, "loss": 1.6486, "step": 1362 }, { "epoch": 1.2597042513863217, "grad_norm": 2.109375, "learning_rate": 6.606148904480577e-06, "loss": 1.5767, "step": 1363 }, { "epoch": 1.2606284658040665, "grad_norm": 2.21875, "learning_rate": 6.591772674871434e-06, "loss": 1.66, "step": 1364 }, { "epoch": 1.2615526802218113, "grad_norm": 2.1875, "learning_rate": 6.577404410606765e-06, "loss": 1.7251, "step": 1365 }, { "epoch": 1.2624768946395564, "grad_norm": 2.171875, "learning_rate": 6.563044145266543e-06, "loss": 1.742, "step": 1366 }, { "epoch": 1.2634011090573014, "grad_norm": 2.15625, "learning_rate": 6.548691912412044e-06, "loss": 1.6591, "step": 1367 }, { "epoch": 1.2643253234750462, "grad_norm": 2.15625, "learning_rate": 6.534347745585772e-06, "loss": 1.6356, "step": 1368 }, { "epoch": 1.265249537892791, "grad_norm": 2.140625, "learning_rate": 6.520011678311382e-06, "loss": 1.7256, "step": 1369 }, { "epoch": 1.266173752310536, "grad_norm": 2.328125, "learning_rate": 6.505683744093597e-06, "loss": 1.7519, "step": 1370 }, { "epoch": 1.267097966728281, "grad_norm": 2.171875, "learning_rate": 6.491363976418131e-06, "loss": 1.6225, "step": 1371 }, { "epoch": 1.268022181146026, "grad_norm": 2.15625, "learning_rate": 6.477052408751616e-06, "loss": 1.6605, "step": 1372 }, { "epoch": 1.2689463955637708, "grad_norm": 2.203125, "learning_rate": 6.462749074541515e-06, "loss": 1.693, "step": 1373 }, { "epoch": 1.2698706099815156, "grad_norm": 2.125, "learning_rate": 6.448454007216054e-06, "loss": 1.6445, "step": 1374 }, { "epoch": 1.2707948243992606, "grad_norm": 2.125, "learning_rate": 6.434167240184135e-06, "loss": 1.6697, "step": 1375 }, { "epoch": 1.2717190388170057, "grad_norm": 2.203125, "learning_rate": 6.419888806835261e-06, "loss": 1.6962, "step": 1376 }, { "epoch": 1.2726432532347505, "grad_norm": 2.15625, "learning_rate": 6.405618740539463e-06, "loss": 1.632, "step": 1377 }, { "epoch": 1.2735674676524953, "grad_norm": 2.265625, "learning_rate": 6.391357074647209e-06, "loss": 1.634, "step": 1378 }, { "epoch": 1.2744916820702403, "grad_norm": 2.265625, "learning_rate": 6.377103842489344e-06, "loss": 1.6808, "step": 1379 }, { "epoch": 1.2754158964879851, "grad_norm": 2.25, "learning_rate": 6.362859077376997e-06, "loss": 1.659, "step": 1380 }, { "epoch": 1.2763401109057302, "grad_norm": 2.21875, "learning_rate": 6.348622812601508e-06, "loss": 1.7258, "step": 1381 }, { "epoch": 1.277264325323475, "grad_norm": 2.125, "learning_rate": 6.334395081434357e-06, "loss": 1.6225, "step": 1382 }, { "epoch": 1.27818853974122, "grad_norm": 2.265625, "learning_rate": 6.320175917127069e-06, "loss": 1.638, "step": 1383 }, { "epoch": 1.2791127541589649, "grad_norm": 2.125, "learning_rate": 6.305965352911162e-06, "loss": 1.6406, "step": 1384 }, { "epoch": 1.28003696857671, "grad_norm": 2.109375, "learning_rate": 6.291763421998043e-06, "loss": 1.6966, "step": 1385 }, { "epoch": 1.2809611829944547, "grad_norm": 2.21875, "learning_rate": 6.277570157578947e-06, "loss": 1.7182, "step": 1386 }, { "epoch": 1.2818853974121995, "grad_norm": 2.171875, "learning_rate": 6.263385592824858e-06, "loss": 1.6446, "step": 1387 }, { "epoch": 1.2828096118299446, "grad_norm": 2.1875, "learning_rate": 6.249209760886413e-06, "loss": 1.6408, "step": 1388 }, { "epoch": 1.2837338262476894, "grad_norm": 2.15625, "learning_rate": 6.2350426948938625e-06, "loss": 1.5935, "step": 1389 }, { "epoch": 1.2846580406654344, "grad_norm": 2.1875, "learning_rate": 6.220884427956953e-06, "loss": 1.6687, "step": 1390 }, { "epoch": 1.2855822550831792, "grad_norm": 2.09375, "learning_rate": 6.206734993164875e-06, "loss": 1.6454, "step": 1391 }, { "epoch": 1.2865064695009243, "grad_norm": 2.234375, "learning_rate": 6.192594423586171e-06, "loss": 1.6601, "step": 1392 }, { "epoch": 1.287430683918669, "grad_norm": 2.234375, "learning_rate": 6.178462752268666e-06, "loss": 1.7254, "step": 1393 }, { "epoch": 1.2883548983364141, "grad_norm": 2.171875, "learning_rate": 6.164340012239397e-06, "loss": 1.683, "step": 1394 }, { "epoch": 1.289279112754159, "grad_norm": 2.171875, "learning_rate": 6.150226236504523e-06, "loss": 1.668, "step": 1395 }, { "epoch": 1.2902033271719038, "grad_norm": 2.109375, "learning_rate": 6.136121458049247e-06, "loss": 1.6105, "step": 1396 }, { "epoch": 1.2911275415896488, "grad_norm": 2.15625, "learning_rate": 6.122025709837749e-06, "loss": 1.6908, "step": 1397 }, { "epoch": 1.2920517560073936, "grad_norm": 2.140625, "learning_rate": 6.107939024813101e-06, "loss": 1.6723, "step": 1398 }, { "epoch": 1.2929759704251387, "grad_norm": 2.171875, "learning_rate": 6.093861435897208e-06, "loss": 1.7832, "step": 1399 }, { "epoch": 1.2939001848428835, "grad_norm": 2.234375, "learning_rate": 6.079792975990699e-06, "loss": 1.7412, "step": 1400 }, { "epoch": 1.2948243992606285, "grad_norm": 2.25, "learning_rate": 6.0657336779728735e-06, "loss": 1.7124, "step": 1401 }, { "epoch": 1.2957486136783734, "grad_norm": 2.234375, "learning_rate": 6.051683574701617e-06, "loss": 1.7146, "step": 1402 }, { "epoch": 1.2966728280961184, "grad_norm": 2.203125, "learning_rate": 6.037642699013338e-06, "loss": 1.644, "step": 1403 }, { "epoch": 1.2975970425138632, "grad_norm": 2.21875, "learning_rate": 6.02361108372286e-06, "loss": 1.7089, "step": 1404 }, { "epoch": 1.298521256931608, "grad_norm": 2.1875, "learning_rate": 6.009588761623379e-06, "loss": 1.7159, "step": 1405 }, { "epoch": 1.299445471349353, "grad_norm": 2.140625, "learning_rate": 5.995575765486368e-06, "loss": 1.6629, "step": 1406 }, { "epoch": 1.300369685767098, "grad_norm": 2.203125, "learning_rate": 5.981572128061497e-06, "loss": 1.7625, "step": 1407 }, { "epoch": 1.301293900184843, "grad_norm": 2.203125, "learning_rate": 5.9675778820765754e-06, "loss": 1.6923, "step": 1408 }, { "epoch": 1.3022181146025877, "grad_norm": 2.234375, "learning_rate": 5.953593060237457e-06, "loss": 1.7193, "step": 1409 }, { "epoch": 1.3031423290203328, "grad_norm": 2.203125, "learning_rate": 5.939617695227974e-06, "loss": 1.5905, "step": 1410 }, { "epoch": 1.3040665434380776, "grad_norm": 2.234375, "learning_rate": 5.925651819709852e-06, "loss": 1.7245, "step": 1411 }, { "epoch": 1.3049907578558226, "grad_norm": 2.125, "learning_rate": 5.911695466322641e-06, "loss": 1.6268, "step": 1412 }, { "epoch": 1.3059149722735675, "grad_norm": 2.203125, "learning_rate": 5.897748667683643e-06, "loss": 1.6749, "step": 1413 }, { "epoch": 1.3068391866913123, "grad_norm": 2.140625, "learning_rate": 5.883811456387821e-06, "loss": 1.6378, "step": 1414 }, { "epoch": 1.3077634011090573, "grad_norm": 2.171875, "learning_rate": 5.8698838650077365e-06, "loss": 1.6425, "step": 1415 }, { "epoch": 1.3086876155268024, "grad_norm": 2.15625, "learning_rate": 5.855965926093466e-06, "loss": 1.7253, "step": 1416 }, { "epoch": 1.3096118299445472, "grad_norm": 2.1875, "learning_rate": 5.8420576721725255e-06, "loss": 1.7506, "step": 1417 }, { "epoch": 1.310536044362292, "grad_norm": 2.171875, "learning_rate": 5.828159135749802e-06, "loss": 1.6706, "step": 1418 }, { "epoch": 1.311460258780037, "grad_norm": 2.15625, "learning_rate": 5.8142703493074716e-06, "loss": 1.6328, "step": 1419 }, { "epoch": 1.3123844731977818, "grad_norm": 2.15625, "learning_rate": 5.800391345304915e-06, "loss": 1.7046, "step": 1420 }, { "epoch": 1.3133086876155269, "grad_norm": 2.1875, "learning_rate": 5.78652215617866e-06, "loss": 1.7224, "step": 1421 }, { "epoch": 1.3142329020332717, "grad_norm": 2.1875, "learning_rate": 5.772662814342292e-06, "loss": 1.788, "step": 1422 }, { "epoch": 1.3151571164510165, "grad_norm": 2.125, "learning_rate": 5.758813352186379e-06, "loss": 1.6454, "step": 1423 }, { "epoch": 1.3160813308687616, "grad_norm": 2.15625, "learning_rate": 5.744973802078409e-06, "loss": 1.6167, "step": 1424 }, { "epoch": 1.3170055452865066, "grad_norm": 2.171875, "learning_rate": 5.731144196362696e-06, "loss": 1.7531, "step": 1425 }, { "epoch": 1.3179297597042514, "grad_norm": 2.234375, "learning_rate": 5.717324567360314e-06, "loss": 1.7143, "step": 1426 }, { "epoch": 1.3188539741219962, "grad_norm": 2.15625, "learning_rate": 5.703514947369021e-06, "loss": 1.6752, "step": 1427 }, { "epoch": 1.3197781885397413, "grad_norm": 2.171875, "learning_rate": 5.689715368663191e-06, "loss": 1.6187, "step": 1428 }, { "epoch": 1.320702402957486, "grad_norm": 2.09375, "learning_rate": 5.675925863493721e-06, "loss": 1.6027, "step": 1429 }, { "epoch": 1.3216266173752311, "grad_norm": 2.1875, "learning_rate": 5.662146464087966e-06, "loss": 1.7121, "step": 1430 }, { "epoch": 1.322550831792976, "grad_norm": 2.1875, "learning_rate": 5.648377202649664e-06, "loss": 1.6924, "step": 1431 }, { "epoch": 1.3234750462107208, "grad_norm": 2.140625, "learning_rate": 5.6346181113588645e-06, "loss": 1.6083, "step": 1432 }, { "epoch": 1.3243992606284658, "grad_norm": 2.171875, "learning_rate": 5.6208692223718434e-06, "loss": 1.7109, "step": 1433 }, { "epoch": 1.3253234750462108, "grad_norm": 2.140625, "learning_rate": 5.60713056782103e-06, "loss": 1.7177, "step": 1434 }, { "epoch": 1.3262476894639557, "grad_norm": 2.15625, "learning_rate": 5.593402179814944e-06, "loss": 1.6718, "step": 1435 }, { "epoch": 1.3271719038817005, "grad_norm": 2.203125, "learning_rate": 5.579684090438099e-06, "loss": 1.7007, "step": 1436 }, { "epoch": 1.3280961182994455, "grad_norm": 2.203125, "learning_rate": 5.565976331750955e-06, "loss": 1.7268, "step": 1437 }, { "epoch": 1.3290203327171903, "grad_norm": 2.234375, "learning_rate": 5.552278935789816e-06, "loss": 1.6633, "step": 1438 }, { "epoch": 1.3299445471349354, "grad_norm": 2.171875, "learning_rate": 5.538591934566771e-06, "loss": 1.6503, "step": 1439 }, { "epoch": 1.3308687615526802, "grad_norm": 2.15625, "learning_rate": 5.524915360069617e-06, "loss": 1.7402, "step": 1440 }, { "epoch": 1.331792975970425, "grad_norm": 2.140625, "learning_rate": 5.511249244261781e-06, "loss": 1.6785, "step": 1441 }, { "epoch": 1.33271719038817, "grad_norm": 2.234375, "learning_rate": 5.4975936190822456e-06, "loss": 1.7743, "step": 1442 }, { "epoch": 1.333641404805915, "grad_norm": 2.203125, "learning_rate": 5.4839485164454796e-06, "loss": 1.7053, "step": 1443 }, { "epoch": 1.33456561922366, "grad_norm": 2.15625, "learning_rate": 5.4703139682413585e-06, "loss": 1.7412, "step": 1444 }, { "epoch": 1.3354898336414047, "grad_norm": 2.15625, "learning_rate": 5.456690006335091e-06, "loss": 1.6954, "step": 1445 }, { "epoch": 1.3364140480591498, "grad_norm": 2.171875, "learning_rate": 5.443076662567136e-06, "loss": 1.7228, "step": 1446 }, { "epoch": 1.3373382624768946, "grad_norm": 2.21875, "learning_rate": 5.429473968753157e-06, "loss": 1.7219, "step": 1447 }, { "epoch": 1.3382624768946396, "grad_norm": 2.125, "learning_rate": 5.415881956683911e-06, "loss": 1.6544, "step": 1448 }, { "epoch": 1.3391866913123844, "grad_norm": 2.140625, "learning_rate": 5.402300658125197e-06, "loss": 1.6401, "step": 1449 }, { "epoch": 1.3401109057301293, "grad_norm": 2.171875, "learning_rate": 5.3887301048177685e-06, "loss": 1.7249, "step": 1450 }, { "epoch": 1.3410351201478743, "grad_norm": 2.15625, "learning_rate": 5.375170328477271e-06, "loss": 1.6682, "step": 1451 }, { "epoch": 1.3419593345656193, "grad_norm": 2.171875, "learning_rate": 5.361621360794169e-06, "loss": 1.6601, "step": 1452 }, { "epoch": 1.3428835489833642, "grad_norm": 2.21875, "learning_rate": 5.3480832334336615e-06, "loss": 1.7277, "step": 1453 }, { "epoch": 1.343807763401109, "grad_norm": 2.203125, "learning_rate": 5.334555978035609e-06, "loss": 1.6536, "step": 1454 }, { "epoch": 1.344731977818854, "grad_norm": 2.125, "learning_rate": 5.321039626214468e-06, "loss": 1.6839, "step": 1455 }, { "epoch": 1.3456561922365988, "grad_norm": 2.171875, "learning_rate": 5.307534209559206e-06, "loss": 1.7224, "step": 1456 }, { "epoch": 1.3465804066543439, "grad_norm": 2.21875, "learning_rate": 5.2940397596332515e-06, "loss": 1.7435, "step": 1457 }, { "epoch": 1.3475046210720887, "grad_norm": 2.234375, "learning_rate": 5.280556307974376e-06, "loss": 1.7032, "step": 1458 }, { "epoch": 1.3484288354898337, "grad_norm": 2.25, "learning_rate": 5.267083886094668e-06, "loss": 1.8032, "step": 1459 }, { "epoch": 1.3493530499075785, "grad_norm": 2.203125, "learning_rate": 5.253622525480429e-06, "loss": 1.721, "step": 1460 }, { "epoch": 1.3502772643253236, "grad_norm": 2.140625, "learning_rate": 5.240172257592109e-06, "loss": 1.6883, "step": 1461 }, { "epoch": 1.3512014787430684, "grad_norm": 2.140625, "learning_rate": 5.226733113864242e-06, "loss": 1.6391, "step": 1462 }, { "epoch": 1.3521256931608132, "grad_norm": 2.296875, "learning_rate": 5.213305125705354e-06, "loss": 1.6023, "step": 1463 }, { "epoch": 1.3530499075785583, "grad_norm": 2.171875, "learning_rate": 5.199888324497907e-06, "loss": 1.7731, "step": 1464 }, { "epoch": 1.353974121996303, "grad_norm": 2.1875, "learning_rate": 5.18648274159821e-06, "loss": 1.6239, "step": 1465 }, { "epoch": 1.354898336414048, "grad_norm": 2.21875, "learning_rate": 5.17308840833636e-06, "loss": 1.6655, "step": 1466 }, { "epoch": 1.355822550831793, "grad_norm": 2.09375, "learning_rate": 5.159705356016158e-06, "loss": 1.6086, "step": 1467 }, { "epoch": 1.356746765249538, "grad_norm": 2.171875, "learning_rate": 5.146333615915047e-06, "loss": 1.7517, "step": 1468 }, { "epoch": 1.3576709796672828, "grad_norm": 2.171875, "learning_rate": 5.132973219284023e-06, "loss": 1.735, "step": 1469 }, { "epoch": 1.3585951940850278, "grad_norm": 2.140625, "learning_rate": 5.119624197347581e-06, "loss": 1.7187, "step": 1470 }, { "epoch": 1.3595194085027726, "grad_norm": 2.09375, "learning_rate": 5.106286581303623e-06, "loss": 1.6843, "step": 1471 }, { "epoch": 1.3604436229205175, "grad_norm": 2.078125, "learning_rate": 5.092960402323404e-06, "loss": 1.6426, "step": 1472 }, { "epoch": 1.3613678373382625, "grad_norm": 2.1875, "learning_rate": 5.079645691551445e-06, "loss": 1.6992, "step": 1473 }, { "epoch": 1.3622920517560073, "grad_norm": 2.203125, "learning_rate": 5.06634248010546e-06, "loss": 1.6783, "step": 1474 }, { "epoch": 1.3632162661737524, "grad_norm": 2.25, "learning_rate": 5.0530507990762955e-06, "loss": 1.6866, "step": 1475 }, { "epoch": 1.3641404805914972, "grad_norm": 2.140625, "learning_rate": 5.039770679527844e-06, "loss": 1.6912, "step": 1476 }, { "epoch": 1.3650646950092422, "grad_norm": 2.171875, "learning_rate": 5.026502152496986e-06, "loss": 1.7571, "step": 1477 }, { "epoch": 1.365988909426987, "grad_norm": 2.1875, "learning_rate": 5.013245248993499e-06, "loss": 1.6757, "step": 1478 }, { "epoch": 1.366913123844732, "grad_norm": 2.1875, "learning_rate": 5.000000000000003e-06, "loss": 1.6955, "step": 1479 }, { "epoch": 1.3678373382624769, "grad_norm": 2.296875, "learning_rate": 4.986766436471873e-06, "loss": 1.6643, "step": 1480 }, { "epoch": 1.3687615526802217, "grad_norm": 2.140625, "learning_rate": 4.973544589337186e-06, "loss": 1.6371, "step": 1481 }, { "epoch": 1.3696857670979667, "grad_norm": 2.203125, "learning_rate": 4.960334489496626e-06, "loss": 1.6576, "step": 1482 }, { "epoch": 1.3706099815157118, "grad_norm": 2.234375, "learning_rate": 4.947136167823428e-06, "loss": 1.7847, "step": 1483 }, { "epoch": 1.3715341959334566, "grad_norm": 2.171875, "learning_rate": 4.933949655163295e-06, "loss": 1.7052, "step": 1484 }, { "epoch": 1.3724584103512014, "grad_norm": 2.109375, "learning_rate": 4.920774982334335e-06, "loss": 1.6767, "step": 1485 }, { "epoch": 1.3733826247689465, "grad_norm": 2.1875, "learning_rate": 4.907612180126986e-06, "loss": 1.6474, "step": 1486 }, { "epoch": 1.3743068391866913, "grad_norm": 2.171875, "learning_rate": 4.89446127930394e-06, "loss": 1.768, "step": 1487 }, { "epoch": 1.3752310536044363, "grad_norm": 2.234375, "learning_rate": 4.881322310600079e-06, "loss": 1.6284, "step": 1488 }, { "epoch": 1.3761552680221811, "grad_norm": 2.140625, "learning_rate": 4.868195304722391e-06, "loss": 1.6341, "step": 1489 }, { "epoch": 1.377079482439926, "grad_norm": 2.15625, "learning_rate": 4.855080292349913e-06, "loss": 1.6644, "step": 1490 }, { "epoch": 1.378003696857671, "grad_norm": 2.140625, "learning_rate": 4.841977304133653e-06, "loss": 1.7202, "step": 1491 }, { "epoch": 1.378927911275416, "grad_norm": 2.265625, "learning_rate": 4.828886370696511e-06, "loss": 1.7045, "step": 1492 }, { "epoch": 1.3798521256931608, "grad_norm": 2.1875, "learning_rate": 4.81580752263322e-06, "loss": 1.6782, "step": 1493 }, { "epoch": 1.3807763401109057, "grad_norm": 2.21875, "learning_rate": 4.802740790510258e-06, "loss": 1.772, "step": 1494 }, { "epoch": 1.3817005545286507, "grad_norm": 2.328125, "learning_rate": 4.789686204865797e-06, "loss": 1.6978, "step": 1495 }, { "epoch": 1.3826247689463955, "grad_norm": 2.15625, "learning_rate": 4.776643796209621e-06, "loss": 1.6673, "step": 1496 }, { "epoch": 1.3835489833641406, "grad_norm": 2.15625, "learning_rate": 4.7636135950230544e-06, "loss": 1.6506, "step": 1497 }, { "epoch": 1.3844731977818854, "grad_norm": 2.171875, "learning_rate": 4.750595631758886e-06, "loss": 1.6678, "step": 1498 }, { "epoch": 1.3853974121996302, "grad_norm": 2.1875, "learning_rate": 4.7375899368413105e-06, "loss": 1.6911, "step": 1499 }, { "epoch": 1.3863216266173752, "grad_norm": 2.15625, "learning_rate": 4.724596540665843e-06, "loss": 1.6708, "step": 1500 }, { "epoch": 1.3872458410351203, "grad_norm": 2.125, "learning_rate": 4.7116154735992695e-06, "loss": 1.7159, "step": 1501 }, { "epoch": 1.388170055452865, "grad_norm": 2.15625, "learning_rate": 4.698646765979541e-06, "loss": 1.6999, "step": 1502 }, { "epoch": 1.38909426987061, "grad_norm": 2.171875, "learning_rate": 4.685690448115739e-06, "loss": 1.6688, "step": 1503 }, { "epoch": 1.390018484288355, "grad_norm": 2.109375, "learning_rate": 4.672746550287985e-06, "loss": 1.6871, "step": 1504 }, { "epoch": 1.3909426987060998, "grad_norm": 2.15625, "learning_rate": 4.6598151027473674e-06, "loss": 1.6373, "step": 1505 }, { "epoch": 1.3918669131238448, "grad_norm": 2.15625, "learning_rate": 4.6468961357158915e-06, "loss": 1.6941, "step": 1506 }, { "epoch": 1.3927911275415896, "grad_norm": 2.09375, "learning_rate": 4.63398967938638e-06, "loss": 1.5945, "step": 1507 }, { "epoch": 1.3937153419593344, "grad_norm": 2.1875, "learning_rate": 4.621095763922425e-06, "loss": 1.7059, "step": 1508 }, { "epoch": 1.3946395563770795, "grad_norm": 2.28125, "learning_rate": 4.608214419458306e-06, "loss": 1.6923, "step": 1509 }, { "epoch": 1.3955637707948245, "grad_norm": 2.140625, "learning_rate": 4.595345676098923e-06, "loss": 1.6638, "step": 1510 }, { "epoch": 1.3964879852125693, "grad_norm": 2.25, "learning_rate": 4.582489563919729e-06, "loss": 1.7736, "step": 1511 }, { "epoch": 1.3974121996303142, "grad_norm": 2.234375, "learning_rate": 4.5696461129666525e-06, "loss": 1.7227, "step": 1512 }, { "epoch": 1.3983364140480592, "grad_norm": 2.203125, "learning_rate": 4.5568153532560354e-06, "loss": 1.7075, "step": 1513 }, { "epoch": 1.399260628465804, "grad_norm": 2.21875, "learning_rate": 4.5439973147745534e-06, "loss": 1.664, "step": 1514 }, { "epoch": 1.400184842883549, "grad_norm": 2.1875, "learning_rate": 4.531192027479161e-06, "loss": 1.6294, "step": 1515 }, { "epoch": 1.4011090573012939, "grad_norm": 2.25, "learning_rate": 4.518399521297005e-06, "loss": 1.7111, "step": 1516 }, { "epoch": 1.4020332717190387, "grad_norm": 2.203125, "learning_rate": 4.505619826125364e-06, "loss": 1.6501, "step": 1517 }, { "epoch": 1.4029574861367837, "grad_norm": 2.203125, "learning_rate": 4.492852971831572e-06, "loss": 1.724, "step": 1518 }, { "epoch": 1.4038817005545288, "grad_norm": 2.140625, "learning_rate": 4.480098988252958e-06, "loss": 1.6937, "step": 1519 }, { "epoch": 1.4048059149722736, "grad_norm": 2.125, "learning_rate": 4.4673579051967665e-06, "loss": 1.6457, "step": 1520 }, { "epoch": 1.4057301293900184, "grad_norm": 2.109375, "learning_rate": 4.454629752440096e-06, "loss": 1.6571, "step": 1521 }, { "epoch": 1.4066543438077634, "grad_norm": 2.125, "learning_rate": 4.441914559729825e-06, "loss": 1.6457, "step": 1522 }, { "epoch": 1.4075785582255083, "grad_norm": 2.1875, "learning_rate": 4.429212356782541e-06, "loss": 1.627, "step": 1523 }, { "epoch": 1.4085027726432533, "grad_norm": 2.265625, "learning_rate": 4.41652317328447e-06, "loss": 1.7244, "step": 1524 }, { "epoch": 1.4094269870609981, "grad_norm": 2.140625, "learning_rate": 4.403847038891425e-06, "loss": 1.6988, "step": 1525 }, { "epoch": 1.410351201478743, "grad_norm": 2.046875, "learning_rate": 4.391183983228705e-06, "loss": 1.6255, "step": 1526 }, { "epoch": 1.411275415896488, "grad_norm": 2.1875, "learning_rate": 4.378534035891051e-06, "loss": 1.7229, "step": 1527 }, { "epoch": 1.412199630314233, "grad_norm": 2.125, "learning_rate": 4.365897226442573e-06, "loss": 1.6781, "step": 1528 }, { "epoch": 1.4131238447319778, "grad_norm": 2.125, "learning_rate": 4.353273584416657e-06, "loss": 1.6935, "step": 1529 }, { "epoch": 1.4140480591497226, "grad_norm": 2.078125, "learning_rate": 4.340663139315942e-06, "loss": 1.6428, "step": 1530 }, { "epoch": 1.4149722735674677, "grad_norm": 2.125, "learning_rate": 4.328065920612207e-06, "loss": 1.6272, "step": 1531 }, { "epoch": 1.4158964879852125, "grad_norm": 2.15625, "learning_rate": 4.315481957746325e-06, "loss": 1.6781, "step": 1532 }, { "epoch": 1.4168207024029575, "grad_norm": 2.109375, "learning_rate": 4.302911280128189e-06, "loss": 1.677, "step": 1533 }, { "epoch": 1.4177449168207024, "grad_norm": 2.15625, "learning_rate": 4.290353917136639e-06, "loss": 1.7141, "step": 1534 }, { "epoch": 1.4186691312384474, "grad_norm": 2.125, "learning_rate": 4.277809898119407e-06, "loss": 1.6917, "step": 1535 }, { "epoch": 1.4195933456561922, "grad_norm": 2.171875, "learning_rate": 4.265279252393035e-06, "loss": 1.6623, "step": 1536 }, { "epoch": 1.4205175600739373, "grad_norm": 2.21875, "learning_rate": 4.2527620092428e-06, "loss": 1.708, "step": 1537 }, { "epoch": 1.421441774491682, "grad_norm": 2.109375, "learning_rate": 4.240258197922668e-06, "loss": 1.6086, "step": 1538 }, { "epoch": 1.422365988909427, "grad_norm": 2.171875, "learning_rate": 4.227767847655205e-06, "loss": 1.7323, "step": 1539 }, { "epoch": 1.423290203327172, "grad_norm": 2.1875, "learning_rate": 4.215290987631531e-06, "loss": 1.7266, "step": 1540 }, { "epoch": 1.4242144177449167, "grad_norm": 2.171875, "learning_rate": 4.202827647011226e-06, "loss": 1.7032, "step": 1541 }, { "epoch": 1.4251386321626618, "grad_norm": 2.1875, "learning_rate": 4.190377854922275e-06, "loss": 1.7317, "step": 1542 }, { "epoch": 1.4260628465804066, "grad_norm": 2.1875, "learning_rate": 4.177941640461003e-06, "loss": 1.6222, "step": 1543 }, { "epoch": 1.4269870609981516, "grad_norm": 2.21875, "learning_rate": 4.165519032691998e-06, "loss": 1.7467, "step": 1544 }, { "epoch": 1.4279112754158965, "grad_norm": 2.171875, "learning_rate": 4.153110060648054e-06, "loss": 1.6809, "step": 1545 }, { "epoch": 1.4288354898336415, "grad_norm": 2.15625, "learning_rate": 4.140714753330091e-06, "loss": 1.6463, "step": 1546 }, { "epoch": 1.4297597042513863, "grad_norm": 2.125, "learning_rate": 4.1283331397070954e-06, "loss": 1.644, "step": 1547 }, { "epoch": 1.4306839186691311, "grad_norm": 2.140625, "learning_rate": 4.1159652487160505e-06, "loss": 1.6242, "step": 1548 }, { "epoch": 1.4316081330868762, "grad_norm": 2.171875, "learning_rate": 4.1036111092618725e-06, "loss": 1.6891, "step": 1549 }, { "epoch": 1.432532347504621, "grad_norm": 2.203125, "learning_rate": 4.0912707502173335e-06, "loss": 1.7121, "step": 1550 }, { "epoch": 1.433456561922366, "grad_norm": 2.125, "learning_rate": 4.078944200422999e-06, "loss": 1.6429, "step": 1551 }, { "epoch": 1.4343807763401109, "grad_norm": 2.234375, "learning_rate": 4.066631488687166e-06, "loss": 1.6839, "step": 1552 }, { "epoch": 1.435304990757856, "grad_norm": 2.1875, "learning_rate": 4.054332643785786e-06, "loss": 1.6877, "step": 1553 }, { "epoch": 1.4362292051756007, "grad_norm": 2.171875, "learning_rate": 4.042047694462405e-06, "loss": 1.6882, "step": 1554 }, { "epoch": 1.4371534195933457, "grad_norm": 2.15625, "learning_rate": 4.029776669428091e-06, "loss": 1.5973, "step": 1555 }, { "epoch": 1.4380776340110906, "grad_norm": 2.109375, "learning_rate": 4.017519597361375e-06, "loss": 1.7257, "step": 1556 }, { "epoch": 1.4390018484288354, "grad_norm": 2.1875, "learning_rate": 4.005276506908172e-06, "loss": 1.7546, "step": 1557 }, { "epoch": 1.4399260628465804, "grad_norm": 2.1875, "learning_rate": 3.993047426681723e-06, "loss": 1.702, "step": 1558 }, { "epoch": 1.4408502772643252, "grad_norm": 2.140625, "learning_rate": 3.980832385262532e-06, "loss": 1.6732, "step": 1559 }, { "epoch": 1.4417744916820703, "grad_norm": 2.21875, "learning_rate": 3.968631411198285e-06, "loss": 1.7638, "step": 1560 }, { "epoch": 1.442698706099815, "grad_norm": 2.21875, "learning_rate": 3.9564445330037934e-06, "loss": 1.7503, "step": 1561 }, { "epoch": 1.4436229205175601, "grad_norm": 2.171875, "learning_rate": 3.944271779160928e-06, "loss": 1.7149, "step": 1562 }, { "epoch": 1.444547134935305, "grad_norm": 2.171875, "learning_rate": 3.932113178118543e-06, "loss": 1.7417, "step": 1563 }, { "epoch": 1.44547134935305, "grad_norm": 2.15625, "learning_rate": 3.919968758292425e-06, "loss": 1.6055, "step": 1564 }, { "epoch": 1.4463955637707948, "grad_norm": 2.171875, "learning_rate": 3.907838548065211e-06, "loss": 1.7189, "step": 1565 }, { "epoch": 1.4473197781885396, "grad_norm": 2.140625, "learning_rate": 3.895722575786333e-06, "loss": 1.7194, "step": 1566 }, { "epoch": 1.4482439926062847, "grad_norm": 2.09375, "learning_rate": 3.883620869771943e-06, "loss": 1.6323, "step": 1567 }, { "epoch": 1.4491682070240297, "grad_norm": 2.203125, "learning_rate": 3.871533458304855e-06, "loss": 1.6635, "step": 1568 }, { "epoch": 1.4500924214417745, "grad_norm": 2.140625, "learning_rate": 3.859460369634479e-06, "loss": 1.6471, "step": 1569 }, { "epoch": 1.4510166358595193, "grad_norm": 2.109375, "learning_rate": 3.847401631976744e-06, "loss": 1.6401, "step": 1570 }, { "epoch": 1.4519408502772644, "grad_norm": 2.1875, "learning_rate": 3.835357273514044e-06, "loss": 1.6384, "step": 1571 }, { "epoch": 1.4528650646950092, "grad_norm": 2.125, "learning_rate": 3.823327322395161e-06, "loss": 1.666, "step": 1572 }, { "epoch": 1.4537892791127542, "grad_norm": 2.140625, "learning_rate": 3.8113118067352116e-06, "loss": 1.6375, "step": 1573 }, { "epoch": 1.454713493530499, "grad_norm": 2.171875, "learning_rate": 3.7993107546155783e-06, "loss": 1.7317, "step": 1574 }, { "epoch": 1.4556377079482439, "grad_norm": 2.140625, "learning_rate": 3.7873241940838347e-06, "loss": 1.7396, "step": 1575 }, { "epoch": 1.456561922365989, "grad_norm": 2.15625, "learning_rate": 3.775352153153687e-06, "loss": 1.7018, "step": 1576 }, { "epoch": 1.457486136783734, "grad_norm": 2.203125, "learning_rate": 3.763394659804912e-06, "loss": 1.6507, "step": 1577 }, { "epoch": 1.4584103512014788, "grad_norm": 2.171875, "learning_rate": 3.7514517419832784e-06, "loss": 1.6875, "step": 1578 }, { "epoch": 1.4593345656192236, "grad_norm": 2.171875, "learning_rate": 3.739523427600509e-06, "loss": 1.7956, "step": 1579 }, { "epoch": 1.4602587800369686, "grad_norm": 2.15625, "learning_rate": 3.727609744534173e-06, "loss": 1.7088, "step": 1580 }, { "epoch": 1.4611829944547134, "grad_norm": 2.109375, "learning_rate": 3.715710720627661e-06, "loss": 1.6138, "step": 1581 }, { "epoch": 1.4621072088724585, "grad_norm": 2.109375, "learning_rate": 3.703826383690099e-06, "loss": 1.637, "step": 1582 }, { "epoch": 1.4630314232902033, "grad_norm": 2.15625, "learning_rate": 3.691956761496285e-06, "loss": 1.5822, "step": 1583 }, { "epoch": 1.4639556377079481, "grad_norm": 2.140625, "learning_rate": 3.6801018817866375e-06, "loss": 1.7177, "step": 1584 }, { "epoch": 1.4648798521256932, "grad_norm": 2.15625, "learning_rate": 3.66826177226711e-06, "loss": 1.5937, "step": 1585 }, { "epoch": 1.4658040665434382, "grad_norm": 2.1875, "learning_rate": 3.656436460609142e-06, "loss": 1.6385, "step": 1586 }, { "epoch": 1.466728280961183, "grad_norm": 2.21875, "learning_rate": 3.644625974449586e-06, "loss": 1.7138, "step": 1587 }, { "epoch": 1.4676524953789278, "grad_norm": 2.234375, "learning_rate": 3.6328303413906474e-06, "loss": 1.714, "step": 1588 }, { "epoch": 1.4685767097966729, "grad_norm": 2.1875, "learning_rate": 3.62104958899982e-06, "loss": 1.7448, "step": 1589 }, { "epoch": 1.4695009242144177, "grad_norm": 2.203125, "learning_rate": 3.609283744809817e-06, "loss": 1.637, "step": 1590 }, { "epoch": 1.4704251386321627, "grad_norm": 2.15625, "learning_rate": 3.5975328363185123e-06, "loss": 1.6341, "step": 1591 }, { "epoch": 1.4713493530499075, "grad_norm": 2.125, "learning_rate": 3.5857968909888694e-06, "loss": 1.6803, "step": 1592 }, { "epoch": 1.4722735674676524, "grad_norm": 2.1875, "learning_rate": 3.574075936248892e-06, "loss": 1.7021, "step": 1593 }, { "epoch": 1.4731977818853974, "grad_norm": 2.1875, "learning_rate": 3.5623699994915363e-06, "loss": 1.6389, "step": 1594 }, { "epoch": 1.4741219963031424, "grad_norm": 2.234375, "learning_rate": 3.5506791080746695e-06, "loss": 1.7286, "step": 1595 }, { "epoch": 1.4750462107208873, "grad_norm": 2.171875, "learning_rate": 3.5390032893209902e-06, "loss": 1.6244, "step": 1596 }, { "epoch": 1.475970425138632, "grad_norm": 2.140625, "learning_rate": 3.527342570517975e-06, "loss": 1.6732, "step": 1597 }, { "epoch": 1.4768946395563771, "grad_norm": 2.234375, "learning_rate": 3.5156969789178097e-06, "loss": 1.7415, "step": 1598 }, { "epoch": 1.477818853974122, "grad_norm": 2.171875, "learning_rate": 3.5040665417373232e-06, "loss": 1.668, "step": 1599 }, { "epoch": 1.478743068391867, "grad_norm": 2.1875, "learning_rate": 3.492451286157932e-06, "loss": 1.766, "step": 1600 }, { "epoch": 1.4796672828096118, "grad_norm": 2.1875, "learning_rate": 3.480851239325569e-06, "loss": 1.6679, "step": 1601 }, { "epoch": 1.4805914972273566, "grad_norm": 2.1875, "learning_rate": 3.4692664283506193e-06, "loss": 1.7481, "step": 1602 }, { "epoch": 1.4815157116451017, "grad_norm": 2.140625, "learning_rate": 3.4576968803078724e-06, "loss": 1.634, "step": 1603 }, { "epoch": 1.4824399260628467, "grad_norm": 2.3125, "learning_rate": 3.4461426222364337e-06, "loss": 1.7525, "step": 1604 }, { "epoch": 1.4833641404805915, "grad_norm": 2.125, "learning_rate": 3.434603681139681e-06, "loss": 1.684, "step": 1605 }, { "epoch": 1.4842883548983363, "grad_norm": 2.125, "learning_rate": 3.4230800839851965e-06, "loss": 1.6552, "step": 1606 }, { "epoch": 1.4852125693160814, "grad_norm": 2.203125, "learning_rate": 3.41157185770469e-06, "loss": 1.7101, "step": 1607 }, { "epoch": 1.4861367837338262, "grad_norm": 2.265625, "learning_rate": 3.400079029193966e-06, "loss": 1.6776, "step": 1608 }, { "epoch": 1.4870609981515712, "grad_norm": 2.234375, "learning_rate": 3.388601625312833e-06, "loss": 1.6726, "step": 1609 }, { "epoch": 1.487985212569316, "grad_norm": 2.140625, "learning_rate": 3.3771396728850505e-06, "loss": 1.7325, "step": 1610 }, { "epoch": 1.488909426987061, "grad_norm": 2.125, "learning_rate": 3.3656931986982676e-06, "loss": 1.6178, "step": 1611 }, { "epoch": 1.489833641404806, "grad_norm": 2.15625, "learning_rate": 3.3542622295039595e-06, "loss": 1.7695, "step": 1612 }, { "epoch": 1.490757855822551, "grad_norm": 2.1875, "learning_rate": 3.342846792017369e-06, "loss": 1.7747, "step": 1613 }, { "epoch": 1.4916820702402958, "grad_norm": 2.203125, "learning_rate": 3.3314469129174365e-06, "loss": 1.7212, "step": 1614 }, { "epoch": 1.4926062846580406, "grad_norm": 2.15625, "learning_rate": 3.3200626188467343e-06, "loss": 1.6473, "step": 1615 }, { "epoch": 1.4935304990757856, "grad_norm": 2.203125, "learning_rate": 3.308693936411421e-06, "loss": 1.6612, "step": 1616 }, { "epoch": 1.4944547134935304, "grad_norm": 2.15625, "learning_rate": 3.297340892181161e-06, "loss": 1.6447, "step": 1617 }, { "epoch": 1.4953789279112755, "grad_norm": 2.3125, "learning_rate": 3.2860035126890833e-06, "loss": 1.6532, "step": 1618 }, { "epoch": 1.4963031423290203, "grad_norm": 2.125, "learning_rate": 3.2746818244316957e-06, "loss": 1.742, "step": 1619 }, { "epoch": 1.4972273567467653, "grad_norm": 2.203125, "learning_rate": 3.2633758538688355e-06, "loss": 1.7345, "step": 1620 }, { "epoch": 1.4981515711645101, "grad_norm": 2.234375, "learning_rate": 3.2520856274236103e-06, "loss": 1.7484, "step": 1621 }, { "epoch": 1.4990757855822552, "grad_norm": 2.078125, "learning_rate": 3.240811171482329e-06, "loss": 1.6099, "step": 1622 }, { "epoch": 1.5, "grad_norm": 2.203125, "learning_rate": 3.229552512394445e-06, "loss": 1.6716, "step": 1623 }, { "epoch": 1.5009242144177448, "grad_norm": 2.1875, "learning_rate": 3.218309676472492e-06, "loss": 1.7036, "step": 1624 }, { "epoch": 1.5018484288354899, "grad_norm": 2.171875, "learning_rate": 3.207082689992025e-06, "loss": 1.787, "step": 1625 }, { "epoch": 1.502772643253235, "grad_norm": 2.265625, "learning_rate": 3.195871579191552e-06, "loss": 1.7575, "step": 1626 }, { "epoch": 1.5036968576709797, "grad_norm": 2.171875, "learning_rate": 3.184676370272488e-06, "loss": 1.7195, "step": 1627 }, { "epoch": 1.5046210720887245, "grad_norm": 2.140625, "learning_rate": 3.173497089399078e-06, "loss": 1.6199, "step": 1628 }, { "epoch": 1.5055452865064693, "grad_norm": 2.171875, "learning_rate": 3.1623337626983398e-06, "loss": 1.7029, "step": 1629 }, { "epoch": 1.5064695009242144, "grad_norm": 2.140625, "learning_rate": 3.151186416260006e-06, "loss": 1.5596, "step": 1630 }, { "epoch": 1.5073937153419594, "grad_norm": 2.125, "learning_rate": 3.140055076136466e-06, "loss": 1.646, "step": 1631 }, { "epoch": 1.5083179297597042, "grad_norm": 2.203125, "learning_rate": 3.1289397683426935e-06, "loss": 1.6982, "step": 1632 }, { "epoch": 1.509242144177449, "grad_norm": 2.109375, "learning_rate": 3.117840518856201e-06, "loss": 1.6321, "step": 1633 }, { "epoch": 1.510166358595194, "grad_norm": 2.1875, "learning_rate": 3.106757353616966e-06, "loss": 1.7068, "step": 1634 }, { "epoch": 1.5110905730129391, "grad_norm": 2.171875, "learning_rate": 3.095690298527377e-06, "loss": 1.6792, "step": 1635 }, { "epoch": 1.512014787430684, "grad_norm": 2.09375, "learning_rate": 3.084639379452169e-06, "loss": 1.5915, "step": 1636 }, { "epoch": 1.5129390018484288, "grad_norm": 2.234375, "learning_rate": 3.073604622218376e-06, "loss": 1.7646, "step": 1637 }, { "epoch": 1.5138632162661736, "grad_norm": 2.171875, "learning_rate": 3.062586052615246e-06, "loss": 1.6813, "step": 1638 }, { "epoch": 1.5147874306839186, "grad_norm": 2.171875, "learning_rate": 3.0515836963942056e-06, "loss": 1.6172, "step": 1639 }, { "epoch": 1.5157116451016637, "grad_norm": 2.1875, "learning_rate": 3.040597579268784e-06, "loss": 1.7103, "step": 1640 }, { "epoch": 1.5166358595194085, "grad_norm": 2.171875, "learning_rate": 3.029627726914559e-06, "loss": 1.7618, "step": 1641 }, { "epoch": 1.5175600739371533, "grad_norm": 2.203125, "learning_rate": 3.0186741649690966e-06, "loss": 1.781, "step": 1642 }, { "epoch": 1.5184842883548983, "grad_norm": 2.09375, "learning_rate": 3.0077369190318906e-06, "loss": 1.5187, "step": 1643 }, { "epoch": 1.5194085027726434, "grad_norm": 2.109375, "learning_rate": 2.9968160146643022e-06, "loss": 1.6056, "step": 1644 }, { "epoch": 1.5203327171903882, "grad_norm": 2.1875, "learning_rate": 2.9859114773895027e-06, "loss": 1.684, "step": 1645 }, { "epoch": 1.521256931608133, "grad_norm": 2.140625, "learning_rate": 2.9750233326924073e-06, "loss": 1.668, "step": 1646 }, { "epoch": 1.5221811460258778, "grad_norm": 2.140625, "learning_rate": 2.9641516060196286e-06, "loss": 1.6224, "step": 1647 }, { "epoch": 1.5231053604436229, "grad_norm": 2.15625, "learning_rate": 2.953296322779401e-06, "loss": 1.7032, "step": 1648 }, { "epoch": 1.524029574861368, "grad_norm": 2.140625, "learning_rate": 2.942457508341536e-06, "loss": 1.6487, "step": 1649 }, { "epoch": 1.5249537892791127, "grad_norm": 2.109375, "learning_rate": 2.931635188037345e-06, "loss": 1.6681, "step": 1650 }, { "epoch": 1.5258780036968576, "grad_norm": 2.078125, "learning_rate": 2.920829387159596e-06, "loss": 1.6449, "step": 1651 }, { "epoch": 1.5268022181146026, "grad_norm": 2.125, "learning_rate": 2.9100401309624604e-06, "loss": 1.6665, "step": 1652 }, { "epoch": 1.5277264325323476, "grad_norm": 2.203125, "learning_rate": 2.89926744466143e-06, "loss": 1.7807, "step": 1653 }, { "epoch": 1.5286506469500925, "grad_norm": 2.09375, "learning_rate": 2.8885113534332742e-06, "loss": 1.6116, "step": 1654 }, { "epoch": 1.5295748613678373, "grad_norm": 2.09375, "learning_rate": 2.8777718824159818e-06, "loss": 1.607, "step": 1655 }, { "epoch": 1.5304990757855823, "grad_norm": 2.15625, "learning_rate": 2.8670490567086895e-06, "loss": 1.7245, "step": 1656 }, { "epoch": 1.5314232902033271, "grad_norm": 2.15625, "learning_rate": 2.8563429013716515e-06, "loss": 1.667, "step": 1657 }, { "epoch": 1.5323475046210722, "grad_norm": 2.125, "learning_rate": 2.845653441426138e-06, "loss": 1.6678, "step": 1658 }, { "epoch": 1.533271719038817, "grad_norm": 2.1875, "learning_rate": 2.834980701854417e-06, "loss": 1.7711, "step": 1659 }, { "epoch": 1.5341959334565618, "grad_norm": 2.15625, "learning_rate": 2.8243247075996694e-06, "loss": 1.6594, "step": 1660 }, { "epoch": 1.5351201478743068, "grad_norm": 2.1875, "learning_rate": 2.8136854835659512e-06, "loss": 1.738, "step": 1661 }, { "epoch": 1.5360443622920519, "grad_norm": 2.09375, "learning_rate": 2.803063054618118e-06, "loss": 1.6341, "step": 1662 }, { "epoch": 1.5369685767097967, "grad_norm": 2.15625, "learning_rate": 2.792457445581773e-06, "loss": 1.6879, "step": 1663 }, { "epoch": 1.5378927911275415, "grad_norm": 2.234375, "learning_rate": 2.781868681243214e-06, "loss": 1.715, "step": 1664 }, { "epoch": 1.5388170055452866, "grad_norm": 2.140625, "learning_rate": 2.771296786349367e-06, "loss": 1.6355, "step": 1665 }, { "epoch": 1.5397412199630314, "grad_norm": 2.15625, "learning_rate": 2.7607417856077334e-06, "loss": 1.6677, "step": 1666 }, { "epoch": 1.5406654343807764, "grad_norm": 2.1875, "learning_rate": 2.750203703686335e-06, "loss": 1.7833, "step": 1667 }, { "epoch": 1.5415896487985212, "grad_norm": 2.125, "learning_rate": 2.7396825652136484e-06, "loss": 1.6678, "step": 1668 }, { "epoch": 1.542513863216266, "grad_norm": 2.125, "learning_rate": 2.7291783947785544e-06, "loss": 1.7076, "step": 1669 }, { "epoch": 1.543438077634011, "grad_norm": 2.203125, "learning_rate": 2.718691216930273e-06, "loss": 1.5984, "step": 1670 }, { "epoch": 1.5443622920517561, "grad_norm": 2.203125, "learning_rate": 2.7082210561783207e-06, "loss": 1.734, "step": 1671 }, { "epoch": 1.545286506469501, "grad_norm": 2.125, "learning_rate": 2.697767936992436e-06, "loss": 1.6952, "step": 1672 }, { "epoch": 1.5462107208872458, "grad_norm": 2.140625, "learning_rate": 2.687331883802531e-06, "loss": 1.6976, "step": 1673 }, { "epoch": 1.5471349353049908, "grad_norm": 2.15625, "learning_rate": 2.6769129209986324e-06, "loss": 1.6475, "step": 1674 }, { "epoch": 1.5480591497227358, "grad_norm": 2.15625, "learning_rate": 2.6665110729308263e-06, "loss": 1.6831, "step": 1675 }, { "epoch": 1.5489833641404807, "grad_norm": 2.140625, "learning_rate": 2.6561263639091995e-06, "loss": 1.6075, "step": 1676 }, { "epoch": 1.5499075785582255, "grad_norm": 2.09375, "learning_rate": 2.645758818203785e-06, "loss": 1.5824, "step": 1677 }, { "epoch": 1.5508317929759703, "grad_norm": 2.140625, "learning_rate": 2.635408460044502e-06, "loss": 1.5836, "step": 1678 }, { "epoch": 1.5517560073937153, "grad_norm": 2.109375, "learning_rate": 2.625075313621098e-06, "loss": 1.6713, "step": 1679 }, { "epoch": 1.5526802218114604, "grad_norm": 2.1875, "learning_rate": 2.614759403083098e-06, "loss": 1.7297, "step": 1680 }, { "epoch": 1.5536044362292052, "grad_norm": 2.109375, "learning_rate": 2.6044607525397503e-06, "loss": 1.5955, "step": 1681 }, { "epoch": 1.55452865064695, "grad_norm": 2.140625, "learning_rate": 2.5941793860599564e-06, "loss": 1.7423, "step": 1682 }, { "epoch": 1.555452865064695, "grad_norm": 2.1875, "learning_rate": 2.5839153276722263e-06, "loss": 1.6421, "step": 1683 }, { "epoch": 1.55637707948244, "grad_norm": 2.1875, "learning_rate": 2.5736686013646226e-06, "loss": 1.6339, "step": 1684 }, { "epoch": 1.557301293900185, "grad_norm": 2.125, "learning_rate": 2.563439231084691e-06, "loss": 1.6867, "step": 1685 }, { "epoch": 1.5582255083179297, "grad_norm": 2.390625, "learning_rate": 2.55322724073943e-06, "loss": 1.7123, "step": 1686 }, { "epoch": 1.5591497227356745, "grad_norm": 2.21875, "learning_rate": 2.543032654195209e-06, "loss": 1.7549, "step": 1687 }, { "epoch": 1.5600739371534196, "grad_norm": 2.203125, "learning_rate": 2.532855495277724e-06, "loss": 1.6939, "step": 1688 }, { "epoch": 1.5609981515711646, "grad_norm": 2.15625, "learning_rate": 2.522695787771944e-06, "loss": 1.7357, "step": 1689 }, { "epoch": 1.5619223659889094, "grad_norm": 2.125, "learning_rate": 2.512553555422048e-06, "loss": 1.7114, "step": 1690 }, { "epoch": 1.5628465804066543, "grad_norm": 2.171875, "learning_rate": 2.502428821931382e-06, "loss": 1.7505, "step": 1691 }, { "epoch": 1.5637707948243993, "grad_norm": 2.15625, "learning_rate": 2.4923216109623917e-06, "loss": 1.6676, "step": 1692 }, { "epoch": 1.5646950092421443, "grad_norm": 2.15625, "learning_rate": 2.482231946136563e-06, "loss": 1.7044, "step": 1693 }, { "epoch": 1.5656192236598891, "grad_norm": 2.203125, "learning_rate": 2.4721598510343857e-06, "loss": 1.6285, "step": 1694 }, { "epoch": 1.566543438077634, "grad_norm": 2.203125, "learning_rate": 2.4621053491952796e-06, "loss": 1.7315, "step": 1695 }, { "epoch": 1.5674676524953788, "grad_norm": 2.09375, "learning_rate": 2.4520684641175573e-06, "loss": 1.6311, "step": 1696 }, { "epoch": 1.5683918669131238, "grad_norm": 2.171875, "learning_rate": 2.442049219258351e-06, "loss": 1.6828, "step": 1697 }, { "epoch": 1.5693160813308689, "grad_norm": 2.171875, "learning_rate": 2.432047638033568e-06, "loss": 1.7121, "step": 1698 }, { "epoch": 1.5702402957486137, "grad_norm": 2.203125, "learning_rate": 2.422063743817832e-06, "loss": 1.6962, "step": 1699 }, { "epoch": 1.5711645101663585, "grad_norm": 2.125, "learning_rate": 2.412097559944442e-06, "loss": 1.7056, "step": 1700 }, { "epoch": 1.5720887245841035, "grad_norm": 2.140625, "learning_rate": 2.4021491097052862e-06, "loss": 1.673, "step": 1701 }, { "epoch": 1.5730129390018486, "grad_norm": 2.1875, "learning_rate": 2.3922184163508254e-06, "loss": 1.5784, "step": 1702 }, { "epoch": 1.5739371534195934, "grad_norm": 2.125, "learning_rate": 2.382305503090011e-06, "loss": 1.5782, "step": 1703 }, { "epoch": 1.5748613678373382, "grad_norm": 2.171875, "learning_rate": 2.372410393090243e-06, "loss": 1.6696, "step": 1704 }, { "epoch": 1.575785582255083, "grad_norm": 2.1875, "learning_rate": 2.362533109477321e-06, "loss": 1.7108, "step": 1705 }, { "epoch": 1.576709796672828, "grad_norm": 2.203125, "learning_rate": 2.3526736753353694e-06, "loss": 1.7086, "step": 1706 }, { "epoch": 1.577634011090573, "grad_norm": 2.15625, "learning_rate": 2.3428321137068077e-06, "loss": 1.6954, "step": 1707 }, { "epoch": 1.578558225508318, "grad_norm": 2.203125, "learning_rate": 2.33300844759228e-06, "loss": 1.701, "step": 1708 }, { "epoch": 1.5794824399260627, "grad_norm": 2.125, "learning_rate": 2.3232026999506062e-06, "loss": 1.6875, "step": 1709 }, { "epoch": 1.5804066543438078, "grad_norm": 2.15625, "learning_rate": 2.3134148936987343e-06, "loss": 1.769, "step": 1710 }, { "epoch": 1.5813308687615528, "grad_norm": 2.171875, "learning_rate": 2.303645051711677e-06, "loss": 1.6297, "step": 1711 }, { "epoch": 1.5822550831792976, "grad_norm": 2.15625, "learning_rate": 2.2938931968224644e-06, "loss": 1.6628, "step": 1712 }, { "epoch": 1.5831792975970425, "grad_norm": 2.1875, "learning_rate": 2.2841593518220884e-06, "loss": 1.7038, "step": 1713 }, { "epoch": 1.5841035120147873, "grad_norm": 2.1875, "learning_rate": 2.27444353945945e-06, "loss": 1.7573, "step": 1714 }, { "epoch": 1.5850277264325323, "grad_norm": 2.265625, "learning_rate": 2.26474578244131e-06, "loss": 1.6885, "step": 1715 }, { "epoch": 1.5859519408502774, "grad_norm": 2.140625, "learning_rate": 2.2550661034322284e-06, "loss": 1.6782, "step": 1716 }, { "epoch": 1.5868761552680222, "grad_norm": 2.140625, "learning_rate": 2.2454045250545154e-06, "loss": 1.7038, "step": 1717 }, { "epoch": 1.587800369685767, "grad_norm": 2.125, "learning_rate": 2.2357610698881784e-06, "loss": 1.614, "step": 1718 }, { "epoch": 1.588724584103512, "grad_norm": 2.09375, "learning_rate": 2.22613576047087e-06, "loss": 1.6649, "step": 1719 }, { "epoch": 1.589648798521257, "grad_norm": 2.140625, "learning_rate": 2.216528619297834e-06, "loss": 1.6781, "step": 1720 }, { "epoch": 1.5905730129390019, "grad_norm": 2.171875, "learning_rate": 2.2069396688218535e-06, "loss": 1.7225, "step": 1721 }, { "epoch": 1.5914972273567467, "grad_norm": 2.1875, "learning_rate": 2.1973689314531986e-06, "loss": 1.7373, "step": 1722 }, { "epoch": 1.5924214417744915, "grad_norm": 2.15625, "learning_rate": 2.187816429559574e-06, "loss": 1.6918, "step": 1723 }, { "epoch": 1.5933456561922366, "grad_norm": 2.125, "learning_rate": 2.178282185466061e-06, "loss": 1.6775, "step": 1724 }, { "epoch": 1.5942698706099816, "grad_norm": 2.046875, "learning_rate": 2.168766221455082e-06, "loss": 1.5588, "step": 1725 }, { "epoch": 1.5951940850277264, "grad_norm": 2.140625, "learning_rate": 2.1592685597663287e-06, "loss": 1.7085, "step": 1726 }, { "epoch": 1.5961182994454712, "grad_norm": 2.15625, "learning_rate": 2.1497892225967222e-06, "loss": 1.65, "step": 1727 }, { "epoch": 1.5970425138632163, "grad_norm": 2.3125, "learning_rate": 2.1403282321003493e-06, "loss": 1.7147, "step": 1728 }, { "epoch": 1.5979667282809613, "grad_norm": 2.09375, "learning_rate": 2.130885610388428e-06, "loss": 1.6512, "step": 1729 }, { "epoch": 1.5988909426987061, "grad_norm": 2.203125, "learning_rate": 2.121461379529247e-06, "loss": 1.7235, "step": 1730 }, { "epoch": 1.599815157116451, "grad_norm": 2.140625, "learning_rate": 2.1120555615481096e-06, "loss": 1.6793, "step": 1731 }, { "epoch": 1.600739371534196, "grad_norm": 2.109375, "learning_rate": 2.1026681784272872e-06, "loss": 1.6066, "step": 1732 }, { "epoch": 1.6016635859519408, "grad_norm": 2.203125, "learning_rate": 2.093299252105969e-06, "loss": 1.7119, "step": 1733 }, { "epoch": 1.6025878003696858, "grad_norm": 2.15625, "learning_rate": 2.083948804480204e-06, "loss": 1.7392, "step": 1734 }, { "epoch": 1.6035120147874307, "grad_norm": 2.171875, "learning_rate": 2.0746168574028667e-06, "loss": 1.6103, "step": 1735 }, { "epoch": 1.6044362292051755, "grad_norm": 2.15625, "learning_rate": 2.0653034326835786e-06, "loss": 1.6784, "step": 1736 }, { "epoch": 1.6053604436229205, "grad_norm": 2.140625, "learning_rate": 2.056008552088682e-06, "loss": 1.6799, "step": 1737 }, { "epoch": 1.6062846580406656, "grad_norm": 2.140625, "learning_rate": 2.0467322373411757e-06, "loss": 1.7153, "step": 1738 }, { "epoch": 1.6072088724584104, "grad_norm": 2.140625, "learning_rate": 2.037474510120676e-06, "loss": 1.6437, "step": 1739 }, { "epoch": 1.6081330868761552, "grad_norm": 2.09375, "learning_rate": 2.0282353920633513e-06, "loss": 1.5968, "step": 1740 }, { "epoch": 1.6090573012939002, "grad_norm": 2.109375, "learning_rate": 2.019014904761879e-06, "loss": 1.6299, "step": 1741 }, { "epoch": 1.609981515711645, "grad_norm": 2.140625, "learning_rate": 2.009813069765396e-06, "loss": 1.6786, "step": 1742 }, { "epoch": 1.61090573012939, "grad_norm": 2.234375, "learning_rate": 2.000629908579449e-06, "loss": 1.668, "step": 1743 }, { "epoch": 1.611829944547135, "grad_norm": 2.125, "learning_rate": 1.9914654426659374e-06, "loss": 1.6618, "step": 1744 }, { "epoch": 1.6127541589648797, "grad_norm": 2.140625, "learning_rate": 1.9823196934430743e-06, "loss": 1.5754, "step": 1745 }, { "epoch": 1.6136783733826248, "grad_norm": 2.125, "learning_rate": 1.9731926822853255e-06, "loss": 1.6699, "step": 1746 }, { "epoch": 1.6146025878003698, "grad_norm": 2.109375, "learning_rate": 1.9640844305233643e-06, "loss": 1.6882, "step": 1747 }, { "epoch": 1.6155268022181146, "grad_norm": 2.125, "learning_rate": 1.954994959444021e-06, "loss": 1.6647, "step": 1748 }, { "epoch": 1.6164510166358594, "grad_norm": 2.140625, "learning_rate": 1.945924290290242e-06, "loss": 1.6628, "step": 1749 }, { "epoch": 1.6173752310536045, "grad_norm": 2.265625, "learning_rate": 1.936872444261022e-06, "loss": 1.7196, "step": 1750 }, { "epoch": 1.6182994454713495, "grad_norm": 2.15625, "learning_rate": 1.9278394425113666e-06, "loss": 1.6188, "step": 1751 }, { "epoch": 1.6192236598890943, "grad_norm": 2.125, "learning_rate": 1.9188253061522434e-06, "loss": 1.6413, "step": 1752 }, { "epoch": 1.6201478743068392, "grad_norm": 2.140625, "learning_rate": 1.9098300562505266e-06, "loss": 1.6915, "step": 1753 }, { "epoch": 1.621072088724584, "grad_norm": 2.125, "learning_rate": 1.9008537138289528e-06, "loss": 1.6813, "step": 1754 }, { "epoch": 1.621996303142329, "grad_norm": 2.25, "learning_rate": 1.8918962998660705e-06, "loss": 1.5854, "step": 1755 }, { "epoch": 1.622920517560074, "grad_norm": 2.234375, "learning_rate": 1.8829578352961886e-06, "loss": 1.6958, "step": 1756 }, { "epoch": 1.6238447319778189, "grad_norm": 2.203125, "learning_rate": 1.874038341009331e-06, "loss": 1.7275, "step": 1757 }, { "epoch": 1.6247689463955637, "grad_norm": 2.15625, "learning_rate": 1.8651378378511841e-06, "loss": 1.6768, "step": 1758 }, { "epoch": 1.6256931608133087, "grad_norm": 2.140625, "learning_rate": 1.8562563466230577e-06, "loss": 1.6385, "step": 1759 }, { "epoch": 1.6266173752310538, "grad_norm": 2.125, "learning_rate": 1.84739388808182e-06, "loss": 1.6735, "step": 1760 }, { "epoch": 1.6275415896487986, "grad_norm": 2.0625, "learning_rate": 1.8385504829398614e-06, "loss": 1.603, "step": 1761 }, { "epoch": 1.6284658040665434, "grad_norm": 2.140625, "learning_rate": 1.8297261518650454e-06, "loss": 1.6916, "step": 1762 }, { "epoch": 1.6293900184842882, "grad_norm": 2.140625, "learning_rate": 1.8209209154806484e-06, "loss": 1.7388, "step": 1763 }, { "epoch": 1.6303142329020333, "grad_norm": 2.078125, "learning_rate": 1.8121347943653333e-06, "loss": 1.5955, "step": 1764 }, { "epoch": 1.6312384473197783, "grad_norm": 2.1875, "learning_rate": 1.8033678090530814e-06, "loss": 1.7096, "step": 1765 }, { "epoch": 1.6321626617375231, "grad_norm": 2.15625, "learning_rate": 1.7946199800331533e-06, "loss": 1.701, "step": 1766 }, { "epoch": 1.633086876155268, "grad_norm": 2.15625, "learning_rate": 1.78589132775004e-06, "loss": 1.7094, "step": 1767 }, { "epoch": 1.634011090573013, "grad_norm": 2.109375, "learning_rate": 1.7771818726034106e-06, "loss": 1.6379, "step": 1768 }, { "epoch": 1.634935304990758, "grad_norm": 2.125, "learning_rate": 1.7684916349480796e-06, "loss": 1.6712, "step": 1769 }, { "epoch": 1.6358595194085028, "grad_norm": 2.109375, "learning_rate": 1.7598206350939384e-06, "loss": 1.7486, "step": 1770 }, { "epoch": 1.6367837338262476, "grad_norm": 2.21875, "learning_rate": 1.7511688933059178e-06, "loss": 1.581, "step": 1771 }, { "epoch": 1.6377079482439925, "grad_norm": 2.21875, "learning_rate": 1.7425364298039426e-06, "loss": 1.6845, "step": 1772 }, { "epoch": 1.6386321626617375, "grad_norm": 2.109375, "learning_rate": 1.733923264762889e-06, "loss": 1.6657, "step": 1773 }, { "epoch": 1.6395563770794825, "grad_norm": 2.140625, "learning_rate": 1.7253294183125223e-06, "loss": 1.6386, "step": 1774 }, { "epoch": 1.6404805914972274, "grad_norm": 2.171875, "learning_rate": 1.7167549105374614e-06, "loss": 1.6283, "step": 1775 }, { "epoch": 1.6414048059149722, "grad_norm": 2.140625, "learning_rate": 1.7081997614771306e-06, "loss": 1.6964, "step": 1776 }, { "epoch": 1.6423290203327172, "grad_norm": 2.140625, "learning_rate": 1.6996639911257051e-06, "loss": 1.6741, "step": 1777 }, { "epoch": 1.6432532347504623, "grad_norm": 2.109375, "learning_rate": 1.691147619432083e-06, "loss": 1.6756, "step": 1778 }, { "epoch": 1.644177449168207, "grad_norm": 2.125, "learning_rate": 1.6826506662998098e-06, "loss": 1.7271, "step": 1779 }, { "epoch": 1.645101663585952, "grad_norm": 2.15625, "learning_rate": 1.6741731515870596e-06, "loss": 1.6986, "step": 1780 }, { "epoch": 1.6460258780036967, "grad_norm": 2.21875, "learning_rate": 1.6657150951065716e-06, "loss": 1.7367, "step": 1781 }, { "epoch": 1.6469500924214417, "grad_norm": 2.171875, "learning_rate": 1.65727651662561e-06, "loss": 1.6797, "step": 1782 }, { "epoch": 1.6478743068391868, "grad_norm": 2.09375, "learning_rate": 1.6488574358659214e-06, "loss": 1.6388, "step": 1783 }, { "epoch": 1.6487985212569316, "grad_norm": 2.15625, "learning_rate": 1.6404578725036802e-06, "loss": 1.6462, "step": 1784 }, { "epoch": 1.6497227356746764, "grad_norm": 2.171875, "learning_rate": 1.6320778461694465e-06, "loss": 1.6633, "step": 1785 }, { "epoch": 1.6506469500924215, "grad_norm": 2.140625, "learning_rate": 1.623717376448123e-06, "loss": 1.6329, "step": 1786 }, { "epoch": 1.6515711645101665, "grad_norm": 2.171875, "learning_rate": 1.6153764828789043e-06, "loss": 1.6788, "step": 1787 }, { "epoch": 1.6524953789279113, "grad_norm": 2.125, "learning_rate": 1.6070551849552362e-06, "loss": 1.7128, "step": 1788 }, { "epoch": 1.6534195933456561, "grad_norm": 2.171875, "learning_rate": 1.5987535021247668e-06, "loss": 1.6424, "step": 1789 }, { "epoch": 1.654343807763401, "grad_norm": 2.109375, "learning_rate": 1.5904714537892996e-06, "loss": 1.6429, "step": 1790 }, { "epoch": 1.655268022181146, "grad_norm": 2.140625, "learning_rate": 1.5822090593047534e-06, "loss": 1.6624, "step": 1791 }, { "epoch": 1.656192236598891, "grad_norm": 2.140625, "learning_rate": 1.5739663379811122e-06, "loss": 1.6841, "step": 1792 }, { "epoch": 1.6571164510166358, "grad_norm": 2.140625, "learning_rate": 1.5657433090823847e-06, "loss": 1.6478, "step": 1793 }, { "epoch": 1.6580406654343807, "grad_norm": 2.15625, "learning_rate": 1.5575399918265543e-06, "loss": 1.7295, "step": 1794 }, { "epoch": 1.6589648798521257, "grad_norm": 2.0625, "learning_rate": 1.5493564053855381e-06, "loss": 1.648, "step": 1795 }, { "epoch": 1.6598890942698707, "grad_norm": 2.109375, "learning_rate": 1.541192568885136e-06, "loss": 1.5844, "step": 1796 }, { "epoch": 1.6608133086876156, "grad_norm": 2.15625, "learning_rate": 1.5330485014049968e-06, "loss": 1.6718, "step": 1797 }, { "epoch": 1.6617375231053604, "grad_norm": 2.1875, "learning_rate": 1.524924221978563e-06, "loss": 1.7539, "step": 1798 }, { "epoch": 1.6626617375231052, "grad_norm": 2.203125, "learning_rate": 1.5168197495930315e-06, "loss": 1.6613, "step": 1799 }, { "epoch": 1.6635859519408502, "grad_norm": 2.1875, "learning_rate": 1.5087351031893083e-06, "loss": 1.6992, "step": 1800 }, { "epoch": 1.6645101663585953, "grad_norm": 2.125, "learning_rate": 1.5006703016619662e-06, "loss": 1.6668, "step": 1801 }, { "epoch": 1.66543438077634, "grad_norm": 2.1875, "learning_rate": 1.4926253638591926e-06, "loss": 1.7006, "step": 1802 }, { "epoch": 1.666358595194085, "grad_norm": 2.15625, "learning_rate": 1.484600308582762e-06, "loss": 1.7117, "step": 1803 }, { "epoch": 1.66728280961183, "grad_norm": 2.140625, "learning_rate": 1.4765951545879732e-06, "loss": 1.6933, "step": 1804 }, { "epoch": 1.668207024029575, "grad_norm": 2.171875, "learning_rate": 1.468609920583617e-06, "loss": 1.6115, "step": 1805 }, { "epoch": 1.6691312384473198, "grad_norm": 2.15625, "learning_rate": 1.460644625231925e-06, "loss": 1.7159, "step": 1806 }, { "epoch": 1.6700554528650646, "grad_norm": 2.125, "learning_rate": 1.4526992871485346e-06, "loss": 1.6245, "step": 1807 }, { "epoch": 1.6709796672828097, "grad_norm": 2.0625, "learning_rate": 1.4447739249024429e-06, "loss": 1.7, "step": 1808 }, { "epoch": 1.6719038817005545, "grad_norm": 2.15625, "learning_rate": 1.436868557015959e-06, "loss": 1.6692, "step": 1809 }, { "epoch": 1.6728280961182995, "grad_norm": 2.171875, "learning_rate": 1.428983201964662e-06, "loss": 1.6697, "step": 1810 }, { "epoch": 1.6737523105360443, "grad_norm": 2.171875, "learning_rate": 1.4211178781773582e-06, "loss": 1.7418, "step": 1811 }, { "epoch": 1.6746765249537892, "grad_norm": 2.203125, "learning_rate": 1.4132726040360468e-06, "loss": 1.6979, "step": 1812 }, { "epoch": 1.6756007393715342, "grad_norm": 2.28125, "learning_rate": 1.4054473978758632e-06, "loss": 1.6794, "step": 1813 }, { "epoch": 1.6765249537892792, "grad_norm": 2.15625, "learning_rate": 1.3976422779850385e-06, "loss": 1.7166, "step": 1814 }, { "epoch": 1.677449168207024, "grad_norm": 2.109375, "learning_rate": 1.3898572626048656e-06, "loss": 1.6725, "step": 1815 }, { "epoch": 1.6783733826247689, "grad_norm": 2.15625, "learning_rate": 1.3820923699296485e-06, "loss": 1.7308, "step": 1816 }, { "epoch": 1.679297597042514, "grad_norm": 2.140625, "learning_rate": 1.3743476181066684e-06, "loss": 1.6168, "step": 1817 }, { "epoch": 1.6802218114602587, "grad_norm": 2.203125, "learning_rate": 1.366623025236129e-06, "loss": 1.757, "step": 1818 }, { "epoch": 1.6811460258780038, "grad_norm": 2.171875, "learning_rate": 1.3589186093711227e-06, "loss": 1.7461, "step": 1819 }, { "epoch": 1.6820702402957486, "grad_norm": 2.234375, "learning_rate": 1.351234388517586e-06, "loss": 1.696, "step": 1820 }, { "epoch": 1.6829944547134934, "grad_norm": 2.140625, "learning_rate": 1.3435703806342592e-06, "loss": 1.6843, "step": 1821 }, { "epoch": 1.6839186691312384, "grad_norm": 2.15625, "learning_rate": 1.3359266036326413e-06, "loss": 1.6991, "step": 1822 }, { "epoch": 1.6848428835489835, "grad_norm": 2.09375, "learning_rate": 1.328303075376951e-06, "loss": 1.725, "step": 1823 }, { "epoch": 1.6857670979667283, "grad_norm": 2.234375, "learning_rate": 1.3206998136840833e-06, "loss": 1.6876, "step": 1824 }, { "epoch": 1.6866913123844731, "grad_norm": 2.15625, "learning_rate": 1.313116836323568e-06, "loss": 1.7342, "step": 1825 }, { "epoch": 1.6876155268022182, "grad_norm": 2.125, "learning_rate": 1.3055541610175282e-06, "loss": 1.6485, "step": 1826 }, { "epoch": 1.6885397412199632, "grad_norm": 2.140625, "learning_rate": 1.298011805440642e-06, "loss": 1.6714, "step": 1827 }, { "epoch": 1.689463955637708, "grad_norm": 2.140625, "learning_rate": 1.290489787220096e-06, "loss": 1.6743, "step": 1828 }, { "epoch": 1.6903881700554528, "grad_norm": 2.125, "learning_rate": 1.2829881239355469e-06, "loss": 1.6445, "step": 1829 }, { "epoch": 1.6913123844731976, "grad_norm": 2.15625, "learning_rate": 1.2755068331190789e-06, "loss": 1.6244, "step": 1830 }, { "epoch": 1.6922365988909427, "grad_norm": 2.171875, "learning_rate": 1.2680459322551652e-06, "loss": 1.724, "step": 1831 }, { "epoch": 1.6931608133086877, "grad_norm": 2.109375, "learning_rate": 1.2606054387806265e-06, "loss": 1.6201, "step": 1832 }, { "epoch": 1.6940850277264325, "grad_norm": 2.171875, "learning_rate": 1.2531853700845875e-06, "loss": 1.6364, "step": 1833 }, { "epoch": 1.6950092421441774, "grad_norm": 2.125, "learning_rate": 1.245785743508441e-06, "loss": 1.6783, "step": 1834 }, { "epoch": 1.6959334565619224, "grad_norm": 2.15625, "learning_rate": 1.2384065763457997e-06, "loss": 1.6219, "step": 1835 }, { "epoch": 1.6968576709796674, "grad_norm": 2.1875, "learning_rate": 1.2310478858424656e-06, "loss": 1.7286, "step": 1836 }, { "epoch": 1.6977818853974123, "grad_norm": 2.140625, "learning_rate": 1.2237096891963862e-06, "loss": 1.682, "step": 1837 }, { "epoch": 1.698706099815157, "grad_norm": 2.140625, "learning_rate": 1.216392003557607e-06, "loss": 1.6028, "step": 1838 }, { "epoch": 1.699630314232902, "grad_norm": 2.21875, "learning_rate": 1.2090948460282415e-06, "loss": 1.681, "step": 1839 }, { "epoch": 1.700554528650647, "grad_norm": 2.140625, "learning_rate": 1.2018182336624272e-06, "loss": 1.6552, "step": 1840 }, { "epoch": 1.701478743068392, "grad_norm": 2.1875, "learning_rate": 1.194562183466279e-06, "loss": 1.6955, "step": 1841 }, { "epoch": 1.7024029574861368, "grad_norm": 2.203125, "learning_rate": 1.1873267123978683e-06, "loss": 1.7359, "step": 1842 }, { "epoch": 1.7033271719038816, "grad_norm": 2.21875, "learning_rate": 1.1801118373671617e-06, "loss": 1.6675, "step": 1843 }, { "epoch": 1.7042513863216266, "grad_norm": 2.1875, "learning_rate": 1.1729175752359922e-06, "loss": 1.7108, "step": 1844 }, { "epoch": 1.7051756007393717, "grad_norm": 2.140625, "learning_rate": 1.1657439428180228e-06, "loss": 1.6802, "step": 1845 }, { "epoch": 1.7060998151571165, "grad_norm": 2.171875, "learning_rate": 1.1585909568786957e-06, "loss": 1.7424, "step": 1846 }, { "epoch": 1.7070240295748613, "grad_norm": 2.109375, "learning_rate": 1.1514586341352108e-06, "loss": 1.6835, "step": 1847 }, { "epoch": 1.7079482439926061, "grad_norm": 2.203125, "learning_rate": 1.1443469912564685e-06, "loss": 1.7367, "step": 1848 }, { "epoch": 1.7088724584103512, "grad_norm": 2.171875, "learning_rate": 1.1372560448630377e-06, "loss": 1.7265, "step": 1849 }, { "epoch": 1.7097966728280962, "grad_norm": 2.15625, "learning_rate": 1.130185811527119e-06, "loss": 1.7146, "step": 1850 }, { "epoch": 1.710720887245841, "grad_norm": 2.171875, "learning_rate": 1.12313630777251e-06, "loss": 1.6858, "step": 1851 }, { "epoch": 1.7116451016635859, "grad_norm": 2.171875, "learning_rate": 1.1161075500745544e-06, "loss": 1.8063, "step": 1852 }, { "epoch": 1.712569316081331, "grad_norm": 2.171875, "learning_rate": 1.1090995548601147e-06, "loss": 1.7348, "step": 1853 }, { "epoch": 1.713493530499076, "grad_norm": 2.140625, "learning_rate": 1.1021123385075261e-06, "loss": 1.696, "step": 1854 }, { "epoch": 1.7144177449168208, "grad_norm": 2.109375, "learning_rate": 1.0951459173465628e-06, "loss": 1.7036, "step": 1855 }, { "epoch": 1.7153419593345656, "grad_norm": 2.15625, "learning_rate": 1.0882003076584059e-06, "loss": 1.6283, "step": 1856 }, { "epoch": 1.7162661737523104, "grad_norm": 2.234375, "learning_rate": 1.0812755256755848e-06, "loss": 1.7004, "step": 1857 }, { "epoch": 1.7171903881700554, "grad_norm": 2.125, "learning_rate": 1.0743715875819616e-06, "loss": 1.7005, "step": 1858 }, { "epoch": 1.7181146025878005, "grad_norm": 2.140625, "learning_rate": 1.067488509512683e-06, "loss": 1.7246, "step": 1859 }, { "epoch": 1.7190388170055453, "grad_norm": 2.15625, "learning_rate": 1.060626307554139e-06, "loss": 1.6638, "step": 1860 }, { "epoch": 1.71996303142329, "grad_norm": 2.15625, "learning_rate": 1.0537849977439406e-06, "loss": 1.6188, "step": 1861 }, { "epoch": 1.7208872458410351, "grad_norm": 2.15625, "learning_rate": 1.046964596070863e-06, "loss": 1.7115, "step": 1862 }, { "epoch": 1.7218114602587802, "grad_norm": 2.109375, "learning_rate": 1.0401651184748184e-06, "loss": 1.6051, "step": 1863 }, { "epoch": 1.722735674676525, "grad_norm": 2.15625, "learning_rate": 1.0333865808468203e-06, "loss": 1.6756, "step": 1864 }, { "epoch": 1.7236598890942698, "grad_norm": 2.21875, "learning_rate": 1.0266289990289413e-06, "loss": 1.7087, "step": 1865 }, { "epoch": 1.7245841035120146, "grad_norm": 2.15625, "learning_rate": 1.0198923888142798e-06, "loss": 1.677, "step": 1866 }, { "epoch": 1.7255083179297597, "grad_norm": 2.171875, "learning_rate": 1.0131767659469206e-06, "loss": 1.6409, "step": 1867 }, { "epoch": 1.7264325323475047, "grad_norm": 2.234375, "learning_rate": 1.0064821461218976e-06, "loss": 1.7091, "step": 1868 }, { "epoch": 1.7273567467652495, "grad_norm": 2.234375, "learning_rate": 9.998085449851635e-07, "loss": 1.7075, "step": 1869 }, { "epoch": 1.7282809611829943, "grad_norm": 2.140625, "learning_rate": 9.93155978133541e-07, "loss": 1.6839, "step": 1870 }, { "epoch": 1.7292051756007394, "grad_norm": 2.109375, "learning_rate": 9.86524461114703e-07, "loss": 1.6832, "step": 1871 }, { "epoch": 1.7301293900184844, "grad_norm": 2.234375, "learning_rate": 9.799140094271198e-07, "loss": 1.6676, "step": 1872 }, { "epoch": 1.7310536044362292, "grad_norm": 2.203125, "learning_rate": 9.733246385200313e-07, "loss": 1.711, "step": 1873 }, { "epoch": 1.731977818853974, "grad_norm": 2.140625, "learning_rate": 9.66756363793413e-07, "loss": 1.6818, "step": 1874 }, { "epoch": 1.7329020332717189, "grad_norm": 2.140625, "learning_rate": 9.602092005979314e-07, "loss": 1.6639, "step": 1875 }, { "epoch": 1.733826247689464, "grad_norm": 2.15625, "learning_rate": 9.536831642349186e-07, "loss": 1.654, "step": 1876 }, { "epoch": 1.734750462107209, "grad_norm": 2.109375, "learning_rate": 9.471782699563281e-07, "loss": 1.6257, "step": 1877 }, { "epoch": 1.7356746765249538, "grad_norm": 2.15625, "learning_rate": 9.406945329647021e-07, "loss": 1.6474, "step": 1878 }, { "epoch": 1.7365988909426986, "grad_norm": 2.15625, "learning_rate": 9.342319684131396e-07, "loss": 1.6825, "step": 1879 }, { "epoch": 1.7375231053604436, "grad_norm": 2.203125, "learning_rate": 9.277905914052521e-07, "loss": 1.6631, "step": 1880 }, { "epoch": 1.7384473197781887, "grad_norm": 2.1875, "learning_rate": 9.213704169951421e-07, "loss": 1.6688, "step": 1881 }, { "epoch": 1.7393715341959335, "grad_norm": 2.171875, "learning_rate": 9.149714601873516e-07, "loss": 1.6986, "step": 1882 }, { "epoch": 1.7402957486136783, "grad_norm": 2.1875, "learning_rate": 9.085937359368402e-07, "loss": 1.7323, "step": 1883 }, { "epoch": 1.7412199630314233, "grad_norm": 2.15625, "learning_rate": 9.022372591489381e-07, "loss": 1.6647, "step": 1884 }, { "epoch": 1.7421441774491682, "grad_norm": 2.1875, "learning_rate": 8.959020446793288e-07, "loss": 1.7657, "step": 1885 }, { "epoch": 1.7430683918669132, "grad_norm": 2.1875, "learning_rate": 8.895881073339963e-07, "loss": 1.7017, "step": 1886 }, { "epoch": 1.743992606284658, "grad_norm": 2.1875, "learning_rate": 8.832954618691991e-07, "loss": 1.683, "step": 1887 }, { "epoch": 1.7449168207024028, "grad_norm": 2.234375, "learning_rate": 8.770241229914356e-07, "loss": 1.6885, "step": 1888 }, { "epoch": 1.7458410351201479, "grad_norm": 2.109375, "learning_rate": 8.70774105357407e-07, "loss": 1.6864, "step": 1889 }, { "epoch": 1.746765249537893, "grad_norm": 2.15625, "learning_rate": 8.645454235739903e-07, "loss": 1.7023, "step": 1890 }, { "epoch": 1.7476894639556377, "grad_norm": 2.09375, "learning_rate": 8.583380921981932e-07, "loss": 1.6499, "step": 1891 }, { "epoch": 1.7486136783733826, "grad_norm": 2.171875, "learning_rate": 8.521521257371245e-07, "loss": 1.6935, "step": 1892 }, { "epoch": 1.7495378927911276, "grad_norm": 2.1875, "learning_rate": 8.459875386479654e-07, "loss": 1.6654, "step": 1893 }, { "epoch": 1.7504621072088724, "grad_norm": 2.203125, "learning_rate": 8.398443453379268e-07, "loss": 1.7092, "step": 1894 }, { "epoch": 1.7513863216266174, "grad_norm": 2.09375, "learning_rate": 8.337225601642274e-07, "loss": 1.6648, "step": 1895 }, { "epoch": 1.7523105360443623, "grad_norm": 2.171875, "learning_rate": 8.276221974340481e-07, "loss": 1.7173, "step": 1896 }, { "epoch": 1.753234750462107, "grad_norm": 2.28125, "learning_rate": 8.215432714045024e-07, "loss": 1.7339, "step": 1897 }, { "epoch": 1.7541589648798521, "grad_norm": 2.265625, "learning_rate": 8.154857962826079e-07, "loss": 1.7517, "step": 1898 }, { "epoch": 1.7550831792975972, "grad_norm": 2.140625, "learning_rate": 8.09449786225247e-07, "loss": 1.6352, "step": 1899 }, { "epoch": 1.756007393715342, "grad_norm": 2.125, "learning_rate": 8.034352553391367e-07, "loss": 1.7038, "step": 1900 }, { "epoch": 1.7569316081330868, "grad_norm": 2.28125, "learning_rate": 7.974422176807961e-07, "loss": 1.7253, "step": 1901 }, { "epoch": 1.7578558225508318, "grad_norm": 2.140625, "learning_rate": 7.914706872565115e-07, "loss": 1.6413, "step": 1902 }, { "epoch": 1.7587800369685767, "grad_norm": 2.15625, "learning_rate": 7.855206780223057e-07, "loss": 1.6303, "step": 1903 }, { "epoch": 1.7597042513863217, "grad_norm": 2.171875, "learning_rate": 7.795922038839032e-07, "loss": 1.7049, "step": 1904 }, { "epoch": 1.7606284658040665, "grad_norm": 2.140625, "learning_rate": 7.736852786967019e-07, "loss": 1.6553, "step": 1905 }, { "epoch": 1.7615526802218113, "grad_norm": 2.09375, "learning_rate": 7.677999162657335e-07, "loss": 1.6725, "step": 1906 }, { "epoch": 1.7624768946395564, "grad_norm": 2.125, "learning_rate": 7.619361303456396e-07, "loss": 1.7171, "step": 1907 }, { "epoch": 1.7634011090573014, "grad_norm": 2.15625, "learning_rate": 7.560939346406326e-07, "loss": 1.6409, "step": 1908 }, { "epoch": 1.7643253234750462, "grad_norm": 2.109375, "learning_rate": 7.502733428044684e-07, "loss": 1.6762, "step": 1909 }, { "epoch": 1.765249537892791, "grad_norm": 2.1875, "learning_rate": 7.44474368440411e-07, "loss": 1.7212, "step": 1910 }, { "epoch": 1.766173752310536, "grad_norm": 2.15625, "learning_rate": 7.386970251012037e-07, "loss": 1.6699, "step": 1911 }, { "epoch": 1.7670979667282811, "grad_norm": 2.140625, "learning_rate": 7.329413262890351e-07, "loss": 1.6666, "step": 1912 }, { "epoch": 1.768022181146026, "grad_norm": 2.171875, "learning_rate": 7.27207285455509e-07, "loss": 1.7303, "step": 1913 }, { "epoch": 1.7689463955637708, "grad_norm": 2.140625, "learning_rate": 7.214949160016116e-07, "loss": 1.739, "step": 1914 }, { "epoch": 1.7698706099815156, "grad_norm": 2.1875, "learning_rate": 7.158042312776847e-07, "loss": 1.6231, "step": 1915 }, { "epoch": 1.7707948243992606, "grad_norm": 2.140625, "learning_rate": 7.101352445833865e-07, "loss": 1.7046, "step": 1916 }, { "epoch": 1.7717190388170057, "grad_norm": 2.140625, "learning_rate": 7.044879691676676e-07, "loss": 1.7177, "step": 1917 }, { "epoch": 1.7726432532347505, "grad_norm": 2.265625, "learning_rate": 6.988624182287362e-07, "loss": 1.7265, "step": 1918 }, { "epoch": 1.7735674676524953, "grad_norm": 2.109375, "learning_rate": 6.932586049140255e-07, "loss": 1.6002, "step": 1919 }, { "epoch": 1.7744916820702403, "grad_norm": 2.15625, "learning_rate": 6.876765423201715e-07, "loss": 1.6873, "step": 1920 }, { "epoch": 1.7754158964879854, "grad_norm": 2.203125, "learning_rate": 6.82116243492974e-07, "loss": 1.7209, "step": 1921 }, { "epoch": 1.7763401109057302, "grad_norm": 2.109375, "learning_rate": 6.765777214273683e-07, "loss": 1.6982, "step": 1922 }, { "epoch": 1.777264325323475, "grad_norm": 2.125, "learning_rate": 6.710609890673936e-07, "loss": 1.6647, "step": 1923 }, { "epoch": 1.7781885397412198, "grad_norm": 2.125, "learning_rate": 6.655660593061719e-07, "loss": 1.6599, "step": 1924 }, { "epoch": 1.7791127541589649, "grad_norm": 2.171875, "learning_rate": 6.600929449858629e-07, "loss": 1.731, "step": 1925 }, { "epoch": 1.78003696857671, "grad_norm": 2.171875, "learning_rate": 6.546416588976445e-07, "loss": 1.6241, "step": 1926 }, { "epoch": 1.7809611829944547, "grad_norm": 2.109375, "learning_rate": 6.492122137816792e-07, "loss": 1.6618, "step": 1927 }, { "epoch": 1.7818853974121995, "grad_norm": 2.171875, "learning_rate": 6.438046223270833e-07, "loss": 1.6418, "step": 1928 }, { "epoch": 1.7828096118299446, "grad_norm": 2.15625, "learning_rate": 6.384188971719052e-07, "loss": 1.6577, "step": 1929 }, { "epoch": 1.7837338262476896, "grad_norm": 2.09375, "learning_rate": 6.330550509030853e-07, "loss": 1.6285, "step": 1930 }, { "epoch": 1.7846580406654344, "grad_norm": 2.125, "learning_rate": 6.277130960564292e-07, "loss": 1.6872, "step": 1931 }, { "epoch": 1.7855822550831792, "grad_norm": 2.109375, "learning_rate": 6.223930451165849e-07, "loss": 1.6117, "step": 1932 }, { "epoch": 1.786506469500924, "grad_norm": 2.109375, "learning_rate": 6.170949105170043e-07, "loss": 1.6835, "step": 1933 }, { "epoch": 1.787430683918669, "grad_norm": 2.203125, "learning_rate": 6.11818704639926e-07, "loss": 1.6912, "step": 1934 }, { "epoch": 1.7883548983364141, "grad_norm": 2.109375, "learning_rate": 6.065644398163284e-07, "loss": 1.5874, "step": 1935 }, { "epoch": 1.789279112754159, "grad_norm": 2.171875, "learning_rate": 6.013321283259199e-07, "loss": 1.6781, "step": 1936 }, { "epoch": 1.7902033271719038, "grad_norm": 2.109375, "learning_rate": 5.961217823970988e-07, "loss": 1.6647, "step": 1937 }, { "epoch": 1.7911275415896488, "grad_norm": 2.125, "learning_rate": 5.909334142069278e-07, "loss": 1.5979, "step": 1938 }, { "epoch": 1.7920517560073939, "grad_norm": 2.15625, "learning_rate": 5.857670358811096e-07, "loss": 1.6654, "step": 1939 }, { "epoch": 1.7929759704251387, "grad_norm": 2.125, "learning_rate": 5.80622659493949e-07, "loss": 1.722, "step": 1940 }, { "epoch": 1.7939001848428835, "grad_norm": 2.21875, "learning_rate": 5.755002970683321e-07, "loss": 1.8237, "step": 1941 }, { "epoch": 1.7948243992606283, "grad_norm": 2.15625, "learning_rate": 5.70399960575696e-07, "loss": 1.696, "step": 1942 }, { "epoch": 1.7957486136783734, "grad_norm": 2.1875, "learning_rate": 5.653216619360047e-07, "loss": 1.6727, "step": 1943 }, { "epoch": 1.7966728280961184, "grad_norm": 2.125, "learning_rate": 5.60265413017711e-07, "loss": 1.6217, "step": 1944 }, { "epoch": 1.7975970425138632, "grad_norm": 2.171875, "learning_rate": 5.552312256377423e-07, "loss": 1.7085, "step": 1945 }, { "epoch": 1.798521256931608, "grad_norm": 2.09375, "learning_rate": 5.50219111561462e-07, "loss": 1.6472, "step": 1946 }, { "epoch": 1.799445471349353, "grad_norm": 2.15625, "learning_rate": 5.452290825026463e-07, "loss": 1.5847, "step": 1947 }, { "epoch": 1.800369685767098, "grad_norm": 2.109375, "learning_rate": 5.402611501234578e-07, "loss": 1.6924, "step": 1948 }, { "epoch": 1.801293900184843, "grad_norm": 2.171875, "learning_rate": 5.353153260344179e-07, "loss": 1.676, "step": 1949 }, { "epoch": 1.8022181146025877, "grad_norm": 2.1875, "learning_rate": 5.303916217943772e-07, "loss": 1.7218, "step": 1950 }, { "epoch": 1.8031423290203326, "grad_norm": 2.140625, "learning_rate": 5.254900489104919e-07, "loss": 1.6064, "step": 1951 }, { "epoch": 1.8040665434380776, "grad_norm": 2.109375, "learning_rate": 5.206106188381921e-07, "loss": 1.7153, "step": 1952 }, { "epoch": 1.8049907578558226, "grad_norm": 2.15625, "learning_rate": 5.157533429811612e-07, "loss": 1.6863, "step": 1953 }, { "epoch": 1.8059149722735675, "grad_norm": 2.1875, "learning_rate": 5.109182326913053e-07, "loss": 1.6782, "step": 1954 }, { "epoch": 1.8068391866913123, "grad_norm": 2.1875, "learning_rate": 5.061052992687254e-07, "loss": 1.696, "step": 1955 }, { "epoch": 1.8077634011090573, "grad_norm": 2.125, "learning_rate": 5.01314553961697e-07, "loss": 1.6038, "step": 1956 }, { "epoch": 1.8086876155268024, "grad_norm": 2.109375, "learning_rate": 4.965460079666362e-07, "loss": 1.6244, "step": 1957 }, { "epoch": 1.8096118299445472, "grad_norm": 2.078125, "learning_rate": 4.917996724280782e-07, "loss": 1.5711, "step": 1958 }, { "epoch": 1.810536044362292, "grad_norm": 2.125, "learning_rate": 4.870755584386544e-07, "loss": 1.6501, "step": 1959 }, { "epoch": 1.8114602587800368, "grad_norm": 2.140625, "learning_rate": 4.823736770390553e-07, "loss": 1.6201, "step": 1960 }, { "epoch": 1.8123844731977818, "grad_norm": 2.140625, "learning_rate": 4.776940392180185e-07, "loss": 1.6897, "step": 1961 }, { "epoch": 1.8133086876155269, "grad_norm": 2.15625, "learning_rate": 4.73036655912289e-07, "loss": 1.742, "step": 1962 }, { "epoch": 1.8142329020332717, "grad_norm": 2.15625, "learning_rate": 4.684015380066087e-07, "loss": 1.7342, "step": 1963 }, { "epoch": 1.8151571164510165, "grad_norm": 2.234375, "learning_rate": 4.6378869633367797e-07, "loss": 1.6994, "step": 1964 }, { "epoch": 1.8160813308687616, "grad_norm": 2.171875, "learning_rate": 4.5919814167413755e-07, "loss": 1.6879, "step": 1965 }, { "epoch": 1.8170055452865066, "grad_norm": 2.203125, "learning_rate": 4.5462988475654113e-07, "loss": 1.7065, "step": 1966 }, { "epoch": 1.8179297597042514, "grad_norm": 2.109375, "learning_rate": 4.500839362573284e-07, "loss": 1.7116, "step": 1967 }, { "epoch": 1.8188539741219962, "grad_norm": 2.15625, "learning_rate": 4.455603068008063e-07, "loss": 1.6573, "step": 1968 }, { "epoch": 1.8197781885397413, "grad_norm": 2.078125, "learning_rate": 4.410590069591192e-07, "loss": 1.579, "step": 1969 }, { "epoch": 1.820702402957486, "grad_norm": 2.21875, "learning_rate": 4.365800472522208e-07, "loss": 1.7174, "step": 1970 }, { "epoch": 1.8216266173752311, "grad_norm": 2.15625, "learning_rate": 4.321234381478578e-07, "loss": 1.6624, "step": 1971 }, { "epoch": 1.822550831792976, "grad_norm": 2.125, "learning_rate": 4.2768919006153876e-07, "loss": 1.7577, "step": 1972 }, { "epoch": 1.8234750462107208, "grad_norm": 2.109375, "learning_rate": 4.2327731335651824e-07, "loss": 1.6311, "step": 1973 }, { "epoch": 1.8243992606284658, "grad_norm": 2.15625, "learning_rate": 4.1888781834375947e-07, "loss": 1.6642, "step": 1974 }, { "epoch": 1.8253234750462108, "grad_norm": 2.125, "learning_rate": 4.1452071528192304e-07, "loss": 1.6249, "step": 1975 }, { "epoch": 1.8262476894639557, "grad_norm": 2.140625, "learning_rate": 4.1017601437733256e-07, "loss": 1.6727, "step": 1976 }, { "epoch": 1.8271719038817005, "grad_norm": 2.140625, "learning_rate": 4.058537257839612e-07, "loss": 1.6696, "step": 1977 }, { "epoch": 1.8280961182994455, "grad_norm": 2.171875, "learning_rate": 4.0155385960339743e-07, "loss": 1.6321, "step": 1978 }, { "epoch": 1.8290203327171903, "grad_norm": 2.140625, "learning_rate": 3.972764258848305e-07, "loss": 1.6942, "step": 1979 }, { "epoch": 1.8299445471349354, "grad_norm": 2.09375, "learning_rate": 3.930214346250205e-07, "loss": 1.6518, "step": 1980 }, { "epoch": 1.8308687615526802, "grad_norm": 2.203125, "learning_rate": 3.887888957682773e-07, "loss": 1.7098, "step": 1981 }, { "epoch": 1.831792975970425, "grad_norm": 2.15625, "learning_rate": 3.8457881920643815e-07, "loss": 1.7276, "step": 1982 }, { "epoch": 1.83271719038817, "grad_norm": 2.15625, "learning_rate": 3.8039121477884467e-07, "loss": 1.6752, "step": 1983 }, { "epoch": 1.833641404805915, "grad_norm": 2.109375, "learning_rate": 3.762260922723182e-07, "loss": 1.6225, "step": 1984 }, { "epoch": 1.83456561922366, "grad_norm": 2.15625, "learning_rate": 3.7208346142113773e-07, "loss": 1.6557, "step": 1985 }, { "epoch": 1.8354898336414047, "grad_norm": 2.21875, "learning_rate": 3.6796333190701636e-07, "loss": 1.7453, "step": 1986 }, { "epoch": 1.8364140480591498, "grad_norm": 2.125, "learning_rate": 3.638657133590817e-07, "loss": 1.6337, "step": 1987 }, { "epoch": 1.8373382624768948, "grad_norm": 2.15625, "learning_rate": 3.597906153538477e-07, "loss": 1.7055, "step": 1988 }, { "epoch": 1.8382624768946396, "grad_norm": 2.171875, "learning_rate": 3.5573804741519834e-07, "loss": 1.6566, "step": 1989 }, { "epoch": 1.8391866913123844, "grad_norm": 2.140625, "learning_rate": 3.517080190143629e-07, "loss": 1.6798, "step": 1990 }, { "epoch": 1.8401109057301293, "grad_norm": 2.125, "learning_rate": 3.477005395698929e-07, "loss": 1.6773, "step": 1991 }, { "epoch": 1.8410351201478743, "grad_norm": 2.109375, "learning_rate": 3.437156184476387e-07, "loss": 1.6994, "step": 1992 }, { "epoch": 1.8419593345656193, "grad_norm": 2.15625, "learning_rate": 3.397532649607338e-07, "loss": 1.7365, "step": 1993 }, { "epoch": 1.8428835489833642, "grad_norm": 2.1875, "learning_rate": 3.358134883695674e-07, "loss": 1.6523, "step": 1994 }, { "epoch": 1.843807763401109, "grad_norm": 2.140625, "learning_rate": 3.31896297881763e-07, "loss": 1.6887, "step": 1995 }, { "epoch": 1.844731977818854, "grad_norm": 2.09375, "learning_rate": 3.280017026521598e-07, "loss": 1.5847, "step": 1996 }, { "epoch": 1.845656192236599, "grad_norm": 2.125, "learning_rate": 3.241297117827891e-07, "loss": 1.6851, "step": 1997 }, { "epoch": 1.8465804066543439, "grad_norm": 2.09375, "learning_rate": 3.2028033432285444e-07, "loss": 1.6794, "step": 1998 }, { "epoch": 1.8475046210720887, "grad_norm": 2.140625, "learning_rate": 3.1645357926870957e-07, "loss": 1.6648, "step": 1999 }, { "epoch": 1.8484288354898335, "grad_norm": 2.15625, "learning_rate": 3.1264945556383486e-07, "loss": 1.6273, "step": 2000 }, { "epoch": 1.8493530499075785, "grad_norm": 2.21875, "learning_rate": 3.08867972098823e-07, "loss": 1.7627, "step": 2001 }, { "epoch": 1.8502772643253236, "grad_norm": 2.203125, "learning_rate": 3.0510913771135463e-07, "loss": 1.6879, "step": 2002 }, { "epoch": 1.8512014787430684, "grad_norm": 2.15625, "learning_rate": 3.0137296118617265e-07, "loss": 1.6635, "step": 2003 }, { "epoch": 1.8521256931608132, "grad_norm": 2.15625, "learning_rate": 2.976594512550723e-07, "loss": 1.6677, "step": 2004 }, { "epoch": 1.8530499075785583, "grad_norm": 2.140625, "learning_rate": 2.9396861659686916e-07, "loss": 1.672, "step": 2005 }, { "epoch": 1.8539741219963033, "grad_norm": 2.171875, "learning_rate": 2.9030046583738756e-07, "loss": 1.6957, "step": 2006 }, { "epoch": 1.854898336414048, "grad_norm": 2.171875, "learning_rate": 2.8665500754943787e-07, "loss": 1.6912, "step": 2007 }, { "epoch": 1.855822550831793, "grad_norm": 2.109375, "learning_rate": 2.830322502527971e-07, "loss": 1.6495, "step": 2008 }, { "epoch": 1.8567467652495377, "grad_norm": 2.1875, "learning_rate": 2.7943220241418376e-07, "loss": 1.7346, "step": 2009 }, { "epoch": 1.8576709796672828, "grad_norm": 2.125, "learning_rate": 2.758548724472465e-07, "loss": 1.6525, "step": 2010 }, { "epoch": 1.8585951940850278, "grad_norm": 2.234375, "learning_rate": 2.7230026871253533e-07, "loss": 1.7678, "step": 2011 }, { "epoch": 1.8595194085027726, "grad_norm": 2.15625, "learning_rate": 2.68768399517495e-07, "loss": 1.709, "step": 2012 }, { "epoch": 1.8604436229205175, "grad_norm": 2.15625, "learning_rate": 2.652592731164294e-07, "loss": 1.6727, "step": 2013 }, { "epoch": 1.8613678373382625, "grad_norm": 2.109375, "learning_rate": 2.6177289771049274e-07, "loss": 1.635, "step": 2014 }, { "epoch": 1.8622920517560075, "grad_norm": 2.171875, "learning_rate": 2.5830928144767065e-07, "loss": 1.6493, "step": 2015 }, { "epoch": 1.8632162661737524, "grad_norm": 2.140625, "learning_rate": 2.548684324227557e-07, "loss": 1.7734, "step": 2016 }, { "epoch": 1.8641404805914972, "grad_norm": 2.171875, "learning_rate": 2.5145035867733315e-07, "loss": 1.6968, "step": 2017 }, { "epoch": 1.865064695009242, "grad_norm": 2.203125, "learning_rate": 2.4805506819975955e-07, "loss": 1.7287, "step": 2018 }, { "epoch": 1.865988909426987, "grad_norm": 2.140625, "learning_rate": 2.446825689251442e-07, "loss": 1.7558, "step": 2019 }, { "epoch": 1.866913123844732, "grad_norm": 2.125, "learning_rate": 2.4133286873533114e-07, "loss": 1.6396, "step": 2020 }, { "epoch": 1.8678373382624769, "grad_norm": 2.125, "learning_rate": 2.380059754588826e-07, "loss": 1.6737, "step": 2021 }, { "epoch": 1.8687615526802217, "grad_norm": 2.125, "learning_rate": 2.347018968710568e-07, "loss": 1.6379, "step": 2022 }, { "epoch": 1.8696857670979667, "grad_norm": 2.109375, "learning_rate": 2.3142064069379466e-07, "loss": 1.5698, "step": 2023 }, { "epoch": 1.8706099815157118, "grad_norm": 2.140625, "learning_rate": 2.281622145956952e-07, "loss": 1.6499, "step": 2024 }, { "epoch": 1.8715341959334566, "grad_norm": 2.140625, "learning_rate": 2.2492662619200578e-07, "loss": 1.6965, "step": 2025 }, { "epoch": 1.8724584103512014, "grad_norm": 2.125, "learning_rate": 2.217138830445942e-07, "loss": 1.7385, "step": 2026 }, { "epoch": 1.8733826247689462, "grad_norm": 2.140625, "learning_rate": 2.1852399266194312e-07, "loss": 1.6848, "step": 2027 }, { "epoch": 1.8743068391866913, "grad_norm": 2.140625, "learning_rate": 2.1535696249912362e-07, "loss": 1.672, "step": 2028 }, { "epoch": 1.8752310536044363, "grad_norm": 2.1875, "learning_rate": 2.1221279995777833e-07, "loss": 1.6331, "step": 2029 }, { "epoch": 1.8761552680221811, "grad_norm": 2.1875, "learning_rate": 2.0909151238610703e-07, "loss": 1.7452, "step": 2030 }, { "epoch": 1.877079482439926, "grad_norm": 2.203125, "learning_rate": 2.0599310707885234e-07, "loss": 1.7548, "step": 2031 }, { "epoch": 1.878003696857671, "grad_norm": 2.125, "learning_rate": 2.0291759127727294e-07, "loss": 1.6543, "step": 2032 }, { "epoch": 1.878927911275416, "grad_norm": 2.140625, "learning_rate": 1.9986497216913702e-07, "loss": 1.6178, "step": 2033 }, { "epoch": 1.8798521256931608, "grad_norm": 2.171875, "learning_rate": 1.9683525688869776e-07, "loss": 1.6636, "step": 2034 }, { "epoch": 1.8807763401109057, "grad_norm": 2.125, "learning_rate": 1.9382845251668335e-07, "loss": 1.699, "step": 2035 }, { "epoch": 1.8817005545286505, "grad_norm": 2.140625, "learning_rate": 1.90844566080276e-07, "loss": 1.7252, "step": 2036 }, { "epoch": 1.8826247689463955, "grad_norm": 2.203125, "learning_rate": 1.8788360455309519e-07, "loss": 1.7678, "step": 2037 }, { "epoch": 1.8835489833641406, "grad_norm": 2.109375, "learning_rate": 1.8494557485518427e-07, "loss": 1.6532, "step": 2038 }, { "epoch": 1.8844731977818854, "grad_norm": 2.25, "learning_rate": 1.8203048385299183e-07, "loss": 1.7097, "step": 2039 }, { "epoch": 1.8853974121996302, "grad_norm": 2.15625, "learning_rate": 1.791383383593548e-07, "loss": 1.728, "step": 2040 }, { "epoch": 1.8863216266173752, "grad_norm": 2.15625, "learning_rate": 1.7626914513348858e-07, "loss": 1.7252, "step": 2041 }, { "epoch": 1.8872458410351203, "grad_norm": 2.09375, "learning_rate": 1.7342291088096484e-07, "loss": 1.6997, "step": 2042 }, { "epoch": 1.888170055452865, "grad_norm": 2.203125, "learning_rate": 1.7059964225369707e-07, "loss": 1.7681, "step": 2043 }, { "epoch": 1.88909426987061, "grad_norm": 2.109375, "learning_rate": 1.677993458499272e-07, "loss": 1.5783, "step": 2044 }, { "epoch": 1.890018484288355, "grad_norm": 2.15625, "learning_rate": 1.6502202821420677e-07, "loss": 1.7424, "step": 2045 }, { "epoch": 1.8909426987060998, "grad_norm": 2.125, "learning_rate": 1.6226769583738923e-07, "loss": 1.6392, "step": 2046 }, { "epoch": 1.8918669131238448, "grad_norm": 2.234375, "learning_rate": 1.5953635515660427e-07, "loss": 1.6274, "step": 2047 }, { "epoch": 1.8927911275415896, "grad_norm": 2.203125, "learning_rate": 1.5682801255524793e-07, "loss": 1.6437, "step": 2048 }, { "epoch": 1.8937153419593344, "grad_norm": 2.140625, "learning_rate": 1.5414267436297036e-07, "loss": 1.7061, "step": 2049 }, { "epoch": 1.8946395563770795, "grad_norm": 2.125, "learning_rate": 1.5148034685565473e-07, "loss": 1.6462, "step": 2050 }, { "epoch": 1.8955637707948245, "grad_norm": 2.140625, "learning_rate": 1.4884103625541156e-07, "loss": 1.7009, "step": 2051 }, { "epoch": 1.8964879852125693, "grad_norm": 2.125, "learning_rate": 1.462247487305535e-07, "loss": 1.6567, "step": 2052 }, { "epoch": 1.8974121996303142, "grad_norm": 2.171875, "learning_rate": 1.436314903955871e-07, "loss": 1.7031, "step": 2053 }, { "epoch": 1.8983364140480592, "grad_norm": 2.25, "learning_rate": 1.4106126731119997e-07, "loss": 1.7494, "step": 2054 }, { "epoch": 1.899260628465804, "grad_norm": 2.21875, "learning_rate": 1.385140854842426e-07, "loss": 1.6983, "step": 2055 }, { "epoch": 1.900184842883549, "grad_norm": 2.171875, "learning_rate": 1.359899508677154e-07, "loss": 1.6875, "step": 2056 }, { "epoch": 1.9011090573012939, "grad_norm": 2.140625, "learning_rate": 1.33488869360755e-07, "loss": 1.6581, "step": 2057 }, { "epoch": 1.9020332717190387, "grad_norm": 2.25, "learning_rate": 1.3101084680862464e-07, "loss": 1.6706, "step": 2058 }, { "epoch": 1.9029574861367837, "grad_norm": 2.1875, "learning_rate": 1.2855588900269057e-07, "loss": 1.6729, "step": 2059 }, { "epoch": 1.9038817005545288, "grad_norm": 2.1875, "learning_rate": 1.2612400168041773e-07, "loss": 1.6942, "step": 2060 }, { "epoch": 1.9048059149722736, "grad_norm": 2.125, "learning_rate": 1.237151905253564e-07, "loss": 1.6305, "step": 2061 }, { "epoch": 1.9057301293900184, "grad_norm": 2.15625, "learning_rate": 1.2132946116711896e-07, "loss": 1.6687, "step": 2062 }, { "epoch": 1.9066543438077634, "grad_norm": 2.21875, "learning_rate": 1.1896681918137865e-07, "loss": 1.7085, "step": 2063 }, { "epoch": 1.9075785582255085, "grad_norm": 2.125, "learning_rate": 1.1662727008984964e-07, "loss": 1.6132, "step": 2064 }, { "epoch": 1.9085027726432533, "grad_norm": 2.15625, "learning_rate": 1.1431081936027599e-07, "loss": 1.6911, "step": 2065 }, { "epoch": 1.9094269870609981, "grad_norm": 2.140625, "learning_rate": 1.1201747240641825e-07, "loss": 1.725, "step": 2066 }, { "epoch": 1.910351201478743, "grad_norm": 2.125, "learning_rate": 1.0974723458804127e-07, "loss": 1.6656, "step": 2067 }, { "epoch": 1.911275415896488, "grad_norm": 2.171875, "learning_rate": 1.0750011121090309e-07, "loss": 1.6886, "step": 2068 }, { "epoch": 1.912199630314233, "grad_norm": 2.15625, "learning_rate": 1.0527610752673944e-07, "loss": 1.7052, "step": 2069 }, { "epoch": 1.9131238447319778, "grad_norm": 2.109375, "learning_rate": 1.030752287332537e-07, "loss": 1.6537, "step": 2070 }, { "epoch": 1.9140480591497226, "grad_norm": 2.171875, "learning_rate": 1.0089747997410582e-07, "loss": 1.6729, "step": 2071 }, { "epoch": 1.9149722735674677, "grad_norm": 2.09375, "learning_rate": 9.874286633889562e-08, "loss": 1.6629, "step": 2072 }, { "epoch": 1.9158964879852127, "grad_norm": 2.09375, "learning_rate": 9.661139286315735e-08, "loss": 1.6588, "step": 2073 }, { "epoch": 1.9168207024029575, "grad_norm": 2.125, "learning_rate": 9.45030645283418e-08, "loss": 1.635, "step": 2074 }, { "epoch": 1.9177449168207024, "grad_norm": 2.171875, "learning_rate": 9.24178862618097e-08, "loss": 1.6955, "step": 2075 }, { "epoch": 1.9186691312384472, "grad_norm": 2.109375, "learning_rate": 9.035586293681509e-08, "loss": 1.6826, "step": 2076 }, { "epoch": 1.9195933456561922, "grad_norm": 2.109375, "learning_rate": 8.83169993724986e-08, "loss": 1.6287, "step": 2077 }, { "epoch": 1.9205175600739373, "grad_norm": 2.171875, "learning_rate": 8.63013003338753e-08, "loss": 1.7415, "step": 2078 }, { "epoch": 1.921441774491682, "grad_norm": 2.125, "learning_rate": 8.43087705318213e-08, "loss": 1.6821, "step": 2079 }, { "epoch": 1.922365988909427, "grad_norm": 2.234375, "learning_rate": 8.233941462306272e-08, "loss": 1.6868, "step": 2080 }, { "epoch": 1.923290203327172, "grad_norm": 2.171875, "learning_rate": 8.039323721017011e-08, "loss": 1.6659, "step": 2081 }, { "epoch": 1.924214417744917, "grad_norm": 2.171875, "learning_rate": 7.847024284153959e-08, "loss": 1.723, "step": 2082 }, { "epoch": 1.9251386321626618, "grad_norm": 2.125, "learning_rate": 7.657043601138835e-08, "loss": 1.6447, "step": 2083 }, { "epoch": 1.9260628465804066, "grad_norm": 2.171875, "learning_rate": 7.469382115974034e-08, "loss": 1.7519, "step": 2084 }, { "epoch": 1.9269870609981514, "grad_norm": 2.171875, "learning_rate": 7.284040267242054e-08, "loss": 1.6459, "step": 2085 }, { "epoch": 1.9279112754158965, "grad_norm": 2.109375, "learning_rate": 7.101018488104073e-08, "loss": 1.6554, "step": 2086 }, { "epoch": 1.9288354898336415, "grad_norm": 2.109375, "learning_rate": 6.920317206299043e-08, "loss": 1.6471, "step": 2087 }, { "epoch": 1.9297597042513863, "grad_norm": 2.109375, "learning_rate": 6.741936844142594e-08, "loss": 1.683, "step": 2088 }, { "epoch": 1.9306839186691311, "grad_norm": 2.265625, "learning_rate": 6.565877818526245e-08, "loss": 1.6961, "step": 2089 }, { "epoch": 1.9316081330868762, "grad_norm": 2.140625, "learning_rate": 6.392140540916303e-08, "loss": 1.7916, "step": 2090 }, { "epoch": 1.9325323475046212, "grad_norm": 2.1875, "learning_rate": 6.22072541735308e-08, "loss": 1.7443, "step": 2091 }, { "epoch": 1.933456561922366, "grad_norm": 2.171875, "learning_rate": 6.051632848449563e-08, "loss": 1.685, "step": 2092 }, { "epoch": 1.9343807763401109, "grad_norm": 2.21875, "learning_rate": 5.884863229390747e-08, "loss": 1.7679, "step": 2093 }, { "epoch": 1.9353049907578557, "grad_norm": 2.140625, "learning_rate": 5.720416949932861e-08, "loss": 1.6701, "step": 2094 }, { "epoch": 1.9362292051756007, "grad_norm": 2.15625, "learning_rate": 5.558294394402253e-08, "loss": 1.6348, "step": 2095 }, { "epoch": 1.9371534195933457, "grad_norm": 2.21875, "learning_rate": 5.3984959416942817e-08, "loss": 1.6908, "step": 2096 }, { "epoch": 1.9380776340110906, "grad_norm": 2.125, "learning_rate": 5.2410219652729854e-08, "loss": 1.6585, "step": 2097 }, { "epoch": 1.9390018484288354, "grad_norm": 2.125, "learning_rate": 5.085872833169414e-08, "loss": 1.6678, "step": 2098 }, { "epoch": 1.9399260628465804, "grad_norm": 2.125, "learning_rate": 4.933048907981741e-08, "loss": 1.5337, "step": 2099 }, { "epoch": 1.9408502772643255, "grad_norm": 2.140625, "learning_rate": 4.782550546873599e-08, "loss": 1.7536, "step": 2100 }, { "epoch": 1.9417744916820703, "grad_norm": 2.140625, "learning_rate": 4.6343781015738555e-08, "loss": 1.6557, "step": 2101 }, { "epoch": 1.942698706099815, "grad_norm": 2.171875, "learning_rate": 4.488531918375061e-08, "loss": 1.718, "step": 2102 }, { "epoch": 1.94362292051756, "grad_norm": 2.125, "learning_rate": 4.3450123381334476e-08, "loss": 1.6587, "step": 2103 }, { "epoch": 1.944547134935305, "grad_norm": 2.1875, "learning_rate": 4.203819696267486e-08, "loss": 1.647, "step": 2104 }, { "epoch": 1.94547134935305, "grad_norm": 2.109375, "learning_rate": 4.064954322757775e-08, "loss": 1.6272, "step": 2105 }, { "epoch": 1.9463955637707948, "grad_norm": 2.328125, "learning_rate": 3.9284165421454854e-08, "loss": 1.6002, "step": 2106 }, { "epoch": 1.9473197781885396, "grad_norm": 2.171875, "learning_rate": 3.7942066735321416e-08, "loss": 1.7355, "step": 2107 }, { "epoch": 1.9482439926062847, "grad_norm": 2.15625, "learning_rate": 3.6623250305786177e-08, "loss": 1.7497, "step": 2108 }, { "epoch": 1.9491682070240297, "grad_norm": 2.140625, "learning_rate": 3.5327719215046965e-08, "loss": 1.6461, "step": 2109 }, { "epoch": 1.9500924214417745, "grad_norm": 2.15625, "learning_rate": 3.405547649087959e-08, "loss": 1.6912, "step": 2110 }, { "epoch": 1.9510166358595193, "grad_norm": 2.140625, "learning_rate": 3.2806525106634514e-08, "loss": 1.6582, "step": 2111 }, { "epoch": 1.9519408502772642, "grad_norm": 2.234375, "learning_rate": 3.158086798122684e-08, "loss": 1.6888, "step": 2112 }, { "epoch": 1.9528650646950092, "grad_norm": 2.125, "learning_rate": 3.0378507979131886e-08, "loss": 1.6608, "step": 2113 }, { "epoch": 1.9537892791127542, "grad_norm": 2.15625, "learning_rate": 2.919944791037632e-08, "loss": 1.686, "step": 2114 }, { "epoch": 1.954713493530499, "grad_norm": 2.203125, "learning_rate": 2.8043690530532575e-08, "loss": 1.6597, "step": 2115 }, { "epoch": 1.9556377079482439, "grad_norm": 2.140625, "learning_rate": 2.6911238540715534e-08, "loss": 1.6216, "step": 2116 }, { "epoch": 1.956561922365989, "grad_norm": 2.171875, "learning_rate": 2.5802094587569215e-08, "loss": 1.7193, "step": 2117 }, { "epoch": 1.957486136783734, "grad_norm": 2.109375, "learning_rate": 2.471626126326565e-08, "loss": 1.7053, "step": 2118 }, { "epoch": 1.9584103512014788, "grad_norm": 2.15625, "learning_rate": 2.3653741105499338e-08, "loss": 1.6858, "step": 2119 }, { "epoch": 1.9593345656192236, "grad_norm": 2.140625, "learning_rate": 2.2614536597479474e-08, "loss": 1.6537, "step": 2120 }, { "epoch": 1.9602587800369686, "grad_norm": 2.109375, "learning_rate": 2.1598650167923284e-08, "loss": 1.6271, "step": 2121 }, { "epoch": 1.9611829944547134, "grad_norm": 2.140625, "learning_rate": 2.0606084191050478e-08, "loss": 1.7058, "step": 2122 }, { "epoch": 1.9621072088724585, "grad_norm": 2.125, "learning_rate": 1.963684098658103e-08, "loss": 1.7061, "step": 2123 }, { "epoch": 1.9630314232902033, "grad_norm": 2.21875, "learning_rate": 1.8690922819727398e-08, "loss": 1.6824, "step": 2124 }, { "epoch": 1.9639556377079481, "grad_norm": 2.09375, "learning_rate": 1.7768331901187873e-08, "loss": 1.5908, "step": 2125 }, { "epoch": 1.9648798521256932, "grad_norm": 2.171875, "learning_rate": 1.686907038714325e-08, "loss": 1.6579, "step": 2126 }, { "epoch": 1.9658040665434382, "grad_norm": 2.09375, "learning_rate": 1.599314037925015e-08, "loss": 1.6318, "step": 2127 }, { "epoch": 1.966728280961183, "grad_norm": 2.109375, "learning_rate": 1.514054392463993e-08, "loss": 1.6805, "step": 2128 }, { "epoch": 1.9676524953789278, "grad_norm": 2.140625, "learning_rate": 1.4311283015910893e-08, "loss": 1.7008, "step": 2129 }, { "epoch": 1.9685767097966729, "grad_norm": 2.171875, "learning_rate": 1.3505359591120537e-08, "loss": 1.6749, "step": 2130 }, { "epoch": 1.9695009242144177, "grad_norm": 2.171875, "learning_rate": 1.2722775533787758e-08, "loss": 1.692, "step": 2131 }, { "epoch": 1.9704251386321627, "grad_norm": 2.171875, "learning_rate": 1.1963532672885081e-08, "loss": 1.6907, "step": 2132 }, { "epoch": 1.9713493530499075, "grad_norm": 2.15625, "learning_rate": 1.122763278283312e-08, "loss": 1.7365, "step": 2133 }, { "epoch": 1.9722735674676524, "grad_norm": 2.15625, "learning_rate": 1.0515077583498346e-08, "loss": 1.7309, "step": 2134 }, { "epoch": 1.9731977818853974, "grad_norm": 2.171875, "learning_rate": 9.825868740188649e-09, "loss": 1.6991, "step": 2135 }, { "epoch": 1.9741219963031424, "grad_norm": 2.109375, "learning_rate": 9.160007863650011e-09, "loss": 1.6802, "step": 2136 }, { "epoch": 1.9750462107208873, "grad_norm": 2.1875, "learning_rate": 8.517496510059842e-09, "loss": 1.6736, "step": 2137 }, { "epoch": 1.975970425138632, "grad_norm": 2.109375, "learning_rate": 7.898336181028088e-09, "loss": 1.6542, "step": 2138 }, { "epoch": 1.9768946395563771, "grad_norm": 2.203125, "learning_rate": 7.302528323589464e-09, "loss": 1.7204, "step": 2139 }, { "epoch": 1.9778188539741222, "grad_norm": 2.15625, "learning_rate": 6.730074330203451e-09, "loss": 1.6704, "step": 2140 }, { "epoch": 1.978743068391867, "grad_norm": 2.125, "learning_rate": 6.180975538746525e-09, "loss": 1.7195, "step": 2141 }, { "epoch": 1.9796672828096118, "grad_norm": 2.140625, "learning_rate": 5.655233232515489e-09, "loss": 1.6639, "step": 2142 }, { "epoch": 1.9805914972273566, "grad_norm": 2.109375, "learning_rate": 5.152848640218588e-09, "loss": 1.6826, "step": 2143 }, { "epoch": 1.9815157116451017, "grad_norm": 2.15625, "learning_rate": 4.673822935973293e-09, "loss": 1.6826, "step": 2144 }, { "epoch": 1.9824399260628467, "grad_norm": 2.109375, "learning_rate": 4.2181572393107385e-09, "loss": 1.5905, "step": 2145 }, { "epoch": 1.9833641404805915, "grad_norm": 2.359375, "learning_rate": 3.785852615161289e-09, "loss": 1.7413, "step": 2146 }, { "epoch": 1.9842883548983363, "grad_norm": 2.09375, "learning_rate": 3.376910073862316e-09, "loss": 1.6407, "step": 2147 }, { "epoch": 1.9852125693160814, "grad_norm": 2.15625, "learning_rate": 2.9913305711504193e-09, "loss": 1.6535, "step": 2148 }, { "epoch": 1.9861367837338264, "grad_norm": 2.140625, "learning_rate": 2.629115008160321e-09, "loss": 1.6787, "step": 2149 }, { "epoch": 1.9870609981515712, "grad_norm": 2.15625, "learning_rate": 2.2902642314248657e-09, "loss": 1.6507, "step": 2150 }, { "epoch": 1.987985212569316, "grad_norm": 2.140625, "learning_rate": 1.974779032868357e-09, "loss": 1.6353, "step": 2151 }, { "epoch": 1.9889094269870609, "grad_norm": 2.125, "learning_rate": 1.6826601498098894e-09, "loss": 1.6357, "step": 2152 }, { "epoch": 1.989833641404806, "grad_norm": 2.171875, "learning_rate": 1.4139082649577974e-09, "loss": 1.7587, "step": 2153 }, { "epoch": 1.990757855822551, "grad_norm": 2.140625, "learning_rate": 1.168524006410765e-09, "loss": 1.6195, "step": 2154 }, { "epoch": 1.9916820702402958, "grad_norm": 2.140625, "learning_rate": 9.46507947655606e-10, "loss": 1.6284, "step": 2155 }, { "epoch": 1.9926062846580406, "grad_norm": 2.15625, "learning_rate": 7.478606075617123e-10, "loss": 1.7714, "step": 2156 }, { "epoch": 1.9935304990757856, "grad_norm": 2.171875, "learning_rate": 5.725824503888255e-10, "loss": 1.695, "step": 2157 }, { "epoch": 1.9944547134935307, "grad_norm": 2.265625, "learning_rate": 4.206738857781556e-10, "loss": 1.7907, "step": 2158 }, { "epoch": 1.9953789279112755, "grad_norm": 2.203125, "learning_rate": 2.9213526875349063e-10, "loss": 1.6692, "step": 2159 }, { "epoch": 1.9963031423290203, "grad_norm": 2.140625, "learning_rate": 1.869668997211971e-10, "loss": 1.6791, "step": 2160 }, { "epoch": 1.997227356746765, "grad_norm": 2.125, "learning_rate": 1.0516902447132993e-10, "loss": 1.6287, "step": 2161 }, { "epoch": 1.9981515711645101, "grad_norm": 2.21875, "learning_rate": 4.674183416986111e-11, "loss": 1.6804, "step": 2162 }, { "epoch": 1.9990757855822552, "grad_norm": 2.28125, "learning_rate": 1.1685465369781768e-11, "loss": 1.7482, "step": 2163 }, { "epoch": 2.0, "grad_norm": 2.453125, "learning_rate": 0.0, "loss": 1.739, "step": 2164 } ], "logging_steps": 1, "max_steps": 2164, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.810202513587569e+17, "train_batch_size": 6, "trial_name": null, "trial_params": null }