{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9996843204390374, "eval_steps": 500, "global_step": 2573, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003885286904154829, "grad_norm": 1.0859375, "learning_rate": 7.751937984496125e-07, "loss": 2.7508, "step": 1 }, { "epoch": 0.0007770573808309658, "grad_norm": 0.875, "learning_rate": 1.550387596899225e-06, "loss": 2.6944, "step": 2 }, { "epoch": 0.0011655860712464486, "grad_norm": 0.94921875, "learning_rate": 2.325581395348837e-06, "loss": 2.6677, "step": 3 }, { "epoch": 0.0015541147616619315, "grad_norm": 0.89453125, "learning_rate": 3.10077519379845e-06, "loss": 2.6969, "step": 4 }, { "epoch": 0.0019426434520774143, "grad_norm": 0.81640625, "learning_rate": 3.875968992248062e-06, "loss": 2.6918, "step": 5 }, { "epoch": 0.002331172142492897, "grad_norm": 0.8203125, "learning_rate": 4.651162790697674e-06, "loss": 2.5546, "step": 6 }, { "epoch": 0.00271970083290838, "grad_norm": 0.984375, "learning_rate": 5.426356589147287e-06, "loss": 2.6413, "step": 7 }, { "epoch": 0.003108229523323863, "grad_norm": 0.9453125, "learning_rate": 6.2015503875969e-06, "loss": 2.7262, "step": 8 }, { "epoch": 0.0034967582137393457, "grad_norm": 0.85546875, "learning_rate": 6.976744186046512e-06, "loss": 2.7526, "step": 9 }, { "epoch": 0.0038852869041548286, "grad_norm": 0.82421875, "learning_rate": 7.751937984496124e-06, "loss": 2.7219, "step": 10 }, { "epoch": 0.004273815594570312, "grad_norm": 0.8359375, "learning_rate": 8.527131782945736e-06, "loss": 2.7289, "step": 11 }, { "epoch": 0.004662344284985794, "grad_norm": 0.87109375, "learning_rate": 9.302325581395349e-06, "loss": 2.6555, "step": 12 }, { "epoch": 0.005050872975401278, "grad_norm": 0.79296875, "learning_rate": 1.0077519379844961e-05, "loss": 2.6741, "step": 13 }, { "epoch": 0.00543940166581676, "grad_norm": 0.84375, "learning_rate": 1.0852713178294575e-05, "loss": 2.6498, "step": 14 }, { "epoch": 0.005827930356232243, "grad_norm": 0.87109375, "learning_rate": 1.1627906976744187e-05, "loss": 2.7851, "step": 15 }, { "epoch": 0.006216459046647726, "grad_norm": 0.828125, "learning_rate": 1.24031007751938e-05, "loss": 2.8024, "step": 16 }, { "epoch": 0.006604987737063209, "grad_norm": 0.92578125, "learning_rate": 1.3178294573643413e-05, "loss": 2.7433, "step": 17 }, { "epoch": 0.006993516427478691, "grad_norm": 0.91796875, "learning_rate": 1.3953488372093024e-05, "loss": 2.687, "step": 18 }, { "epoch": 0.007382045117894175, "grad_norm": 0.87109375, "learning_rate": 1.4728682170542638e-05, "loss": 2.7203, "step": 19 }, { "epoch": 0.007770573808309657, "grad_norm": 0.96484375, "learning_rate": 1.5503875968992248e-05, "loss": 2.6724, "step": 20 }, { "epoch": 0.00815910249872514, "grad_norm": 0.8203125, "learning_rate": 1.6279069767441862e-05, "loss": 2.7522, "step": 21 }, { "epoch": 0.008547631189140623, "grad_norm": 0.87890625, "learning_rate": 1.7054263565891473e-05, "loss": 2.692, "step": 22 }, { "epoch": 0.008936159879556106, "grad_norm": 0.8125, "learning_rate": 1.7829457364341087e-05, "loss": 2.6184, "step": 23 }, { "epoch": 0.009324688569971588, "grad_norm": 0.7890625, "learning_rate": 1.8604651162790697e-05, "loss": 2.6716, "step": 24 }, { "epoch": 0.009713217260387071, "grad_norm": 0.81640625, "learning_rate": 1.937984496124031e-05, "loss": 2.689, "step": 25 }, { "epoch": 0.010101745950802555, "grad_norm": 0.81640625, "learning_rate": 2.0155038759689922e-05, "loss": 2.623, "step": 26 }, { "epoch": 0.010490274641218038, "grad_norm": 0.828125, "learning_rate": 2.0930232558139536e-05, "loss": 2.711, "step": 27 }, { "epoch": 0.01087880333163352, "grad_norm": 0.8125, "learning_rate": 2.170542635658915e-05, "loss": 2.7812, "step": 28 }, { "epoch": 0.011267332022049003, "grad_norm": 0.8046875, "learning_rate": 2.2480620155038764e-05, "loss": 2.7191, "step": 29 }, { "epoch": 0.011655860712464486, "grad_norm": 0.79296875, "learning_rate": 2.3255813953488374e-05, "loss": 2.7255, "step": 30 }, { "epoch": 0.012044389402879968, "grad_norm": 0.84375, "learning_rate": 2.4031007751937988e-05, "loss": 2.6695, "step": 31 }, { "epoch": 0.012432918093295452, "grad_norm": 0.82421875, "learning_rate": 2.48062015503876e-05, "loss": 2.7645, "step": 32 }, { "epoch": 0.012821446783710935, "grad_norm": 0.8046875, "learning_rate": 2.5581395348837212e-05, "loss": 2.7689, "step": 33 }, { "epoch": 0.013209975474126417, "grad_norm": 0.8125, "learning_rate": 2.6356589147286826e-05, "loss": 2.7137, "step": 34 }, { "epoch": 0.0135985041645419, "grad_norm": 0.83984375, "learning_rate": 2.7131782945736434e-05, "loss": 2.6915, "step": 35 }, { "epoch": 0.013987032854957383, "grad_norm": 0.76953125, "learning_rate": 2.7906976744186048e-05, "loss": 2.6823, "step": 36 }, { "epoch": 0.014375561545372867, "grad_norm": 0.796875, "learning_rate": 2.868217054263566e-05, "loss": 2.7233, "step": 37 }, { "epoch": 0.01476409023578835, "grad_norm": 0.77734375, "learning_rate": 2.9457364341085275e-05, "loss": 2.7272, "step": 38 }, { "epoch": 0.015152618926203832, "grad_norm": 0.8125, "learning_rate": 3.0232558139534883e-05, "loss": 2.7218, "step": 39 }, { "epoch": 0.015541147616619315, "grad_norm": 0.80078125, "learning_rate": 3.1007751937984497e-05, "loss": 2.7278, "step": 40 }, { "epoch": 0.015929676307034797, "grad_norm": 0.828125, "learning_rate": 3.1782945736434114e-05, "loss": 2.7345, "step": 41 }, { "epoch": 0.01631820499745028, "grad_norm": 0.8125, "learning_rate": 3.2558139534883724e-05, "loss": 2.6648, "step": 42 }, { "epoch": 0.016706733687865762, "grad_norm": 0.86328125, "learning_rate": 3.3333333333333335e-05, "loss": 2.6722, "step": 43 }, { "epoch": 0.017095262378281247, "grad_norm": 0.7734375, "learning_rate": 3.4108527131782945e-05, "loss": 2.7215, "step": 44 }, { "epoch": 0.017483791068696727, "grad_norm": 0.80859375, "learning_rate": 3.488372093023256e-05, "loss": 2.7325, "step": 45 }, { "epoch": 0.01787231975911221, "grad_norm": 0.8359375, "learning_rate": 3.565891472868217e-05, "loss": 2.7248, "step": 46 }, { "epoch": 0.018260848449527696, "grad_norm": 0.81640625, "learning_rate": 3.6434108527131784e-05, "loss": 2.6442, "step": 47 }, { "epoch": 0.018649377139943177, "grad_norm": 0.75390625, "learning_rate": 3.7209302325581394e-05, "loss": 2.6807, "step": 48 }, { "epoch": 0.01903790583035866, "grad_norm": 0.75, "learning_rate": 3.798449612403101e-05, "loss": 2.5396, "step": 49 }, { "epoch": 0.019426434520774142, "grad_norm": 0.8359375, "learning_rate": 3.875968992248062e-05, "loss": 2.6923, "step": 50 }, { "epoch": 0.019814963211189626, "grad_norm": 0.796875, "learning_rate": 3.953488372093023e-05, "loss": 2.5531, "step": 51 }, { "epoch": 0.02020349190160511, "grad_norm": 0.77734375, "learning_rate": 4.0310077519379843e-05, "loss": 2.6413, "step": 52 }, { "epoch": 0.02059202059202059, "grad_norm": 0.79296875, "learning_rate": 4.108527131782946e-05, "loss": 2.6221, "step": 53 }, { "epoch": 0.020980549282436076, "grad_norm": 0.78515625, "learning_rate": 4.186046511627907e-05, "loss": 2.6596, "step": 54 }, { "epoch": 0.021369077972851556, "grad_norm": 0.76953125, "learning_rate": 4.263565891472868e-05, "loss": 2.6862, "step": 55 }, { "epoch": 0.02175760666326704, "grad_norm": 0.78125, "learning_rate": 4.34108527131783e-05, "loss": 2.7227, "step": 56 }, { "epoch": 0.022146135353682525, "grad_norm": 0.7890625, "learning_rate": 4.418604651162791e-05, "loss": 2.7295, "step": 57 }, { "epoch": 0.022534664044098006, "grad_norm": 0.7578125, "learning_rate": 4.496124031007753e-05, "loss": 2.7031, "step": 58 }, { "epoch": 0.02292319273451349, "grad_norm": 0.87890625, "learning_rate": 4.573643410852713e-05, "loss": 2.7625, "step": 59 }, { "epoch": 0.02331172142492897, "grad_norm": 0.76953125, "learning_rate": 4.651162790697675e-05, "loss": 2.7127, "step": 60 }, { "epoch": 0.023700250115344455, "grad_norm": 0.80078125, "learning_rate": 4.728682170542636e-05, "loss": 2.6414, "step": 61 }, { "epoch": 0.024088778805759936, "grad_norm": 0.77734375, "learning_rate": 4.8062015503875976e-05, "loss": 2.6865, "step": 62 }, { "epoch": 0.02447730749617542, "grad_norm": 0.82421875, "learning_rate": 4.883720930232558e-05, "loss": 2.6609, "step": 63 }, { "epoch": 0.024865836186590905, "grad_norm": 0.859375, "learning_rate": 4.96124031007752e-05, "loss": 2.7198, "step": 64 }, { "epoch": 0.025254364877006386, "grad_norm": 0.8125, "learning_rate": 5.038759689922481e-05, "loss": 2.6678, "step": 65 }, { "epoch": 0.02564289356742187, "grad_norm": 0.7578125, "learning_rate": 5.1162790697674425e-05, "loss": 2.7294, "step": 66 }, { "epoch": 0.02603142225783735, "grad_norm": 0.69140625, "learning_rate": 5.1937984496124036e-05, "loss": 2.6087, "step": 67 }, { "epoch": 0.026419950948252835, "grad_norm": 0.75390625, "learning_rate": 5.271317829457365e-05, "loss": 2.6123, "step": 68 }, { "epoch": 0.02680847963866832, "grad_norm": 0.76171875, "learning_rate": 5.348837209302326e-05, "loss": 2.6227, "step": 69 }, { "epoch": 0.0271970083290838, "grad_norm": 0.80859375, "learning_rate": 5.426356589147287e-05, "loss": 2.6388, "step": 70 }, { "epoch": 0.027585537019499284, "grad_norm": 0.79296875, "learning_rate": 5.503875968992248e-05, "loss": 2.6372, "step": 71 }, { "epoch": 0.027974065709914765, "grad_norm": 0.7578125, "learning_rate": 5.5813953488372095e-05, "loss": 2.6499, "step": 72 }, { "epoch": 0.02836259440033025, "grad_norm": 0.79296875, "learning_rate": 5.6589147286821706e-05, "loss": 2.6805, "step": 73 }, { "epoch": 0.028751123090745734, "grad_norm": 0.78125, "learning_rate": 5.736434108527132e-05, "loss": 2.646, "step": 74 }, { "epoch": 0.029139651781161215, "grad_norm": 0.83984375, "learning_rate": 5.8139534883720933e-05, "loss": 2.6443, "step": 75 }, { "epoch": 0.0295281804715767, "grad_norm": 0.8515625, "learning_rate": 5.891472868217055e-05, "loss": 2.7152, "step": 76 }, { "epoch": 0.02991670916199218, "grad_norm": 0.84375, "learning_rate": 5.9689922480620155e-05, "loss": 2.6011, "step": 77 }, { "epoch": 0.030305237852407664, "grad_norm": 0.84765625, "learning_rate": 6.0465116279069765e-05, "loss": 2.5998, "step": 78 }, { "epoch": 0.030693766542823145, "grad_norm": 0.8671875, "learning_rate": 6.124031007751938e-05, "loss": 2.6207, "step": 79 }, { "epoch": 0.03108229523323863, "grad_norm": 0.75390625, "learning_rate": 6.201550387596899e-05, "loss": 2.616, "step": 80 }, { "epoch": 0.03147082392365411, "grad_norm": 0.796875, "learning_rate": 6.27906976744186e-05, "loss": 2.6722, "step": 81 }, { "epoch": 0.031859352614069594, "grad_norm": 0.78125, "learning_rate": 6.356589147286823e-05, "loss": 2.5915, "step": 82 }, { "epoch": 0.032247881304485075, "grad_norm": 0.80078125, "learning_rate": 6.434108527131784e-05, "loss": 2.6935, "step": 83 }, { "epoch": 0.03263640999490056, "grad_norm": 0.80859375, "learning_rate": 6.511627906976745e-05, "loss": 2.5366, "step": 84 }, { "epoch": 0.033024938685316044, "grad_norm": 0.70703125, "learning_rate": 6.589147286821705e-05, "loss": 2.5981, "step": 85 }, { "epoch": 0.033413467375731525, "grad_norm": 0.765625, "learning_rate": 6.666666666666667e-05, "loss": 2.598, "step": 86 }, { "epoch": 0.03380199606614701, "grad_norm": 0.87109375, "learning_rate": 6.744186046511628e-05, "loss": 2.7113, "step": 87 }, { "epoch": 0.03419052475656249, "grad_norm": 0.83984375, "learning_rate": 6.821705426356589e-05, "loss": 2.6221, "step": 88 }, { "epoch": 0.034579053446977974, "grad_norm": 0.7890625, "learning_rate": 6.89922480620155e-05, "loss": 2.5659, "step": 89 }, { "epoch": 0.034967582137393455, "grad_norm": 0.83203125, "learning_rate": 6.976744186046513e-05, "loss": 2.6699, "step": 90 }, { "epoch": 0.03535611082780894, "grad_norm": 0.77734375, "learning_rate": 7.054263565891474e-05, "loss": 2.6132, "step": 91 }, { "epoch": 0.03574463951822442, "grad_norm": 0.78515625, "learning_rate": 7.131782945736435e-05, "loss": 2.536, "step": 92 }, { "epoch": 0.036133168208639904, "grad_norm": 0.89453125, "learning_rate": 7.209302325581396e-05, "loss": 2.6658, "step": 93 }, { "epoch": 0.03652169689905539, "grad_norm": 0.8359375, "learning_rate": 7.286821705426357e-05, "loss": 2.5863, "step": 94 }, { "epoch": 0.03691022558947087, "grad_norm": 0.734375, "learning_rate": 7.364341085271318e-05, "loss": 2.5564, "step": 95 }, { "epoch": 0.037298754279886354, "grad_norm": 0.8203125, "learning_rate": 7.441860465116279e-05, "loss": 2.627, "step": 96 }, { "epoch": 0.03768728297030184, "grad_norm": 0.94921875, "learning_rate": 7.519379844961241e-05, "loss": 2.6368, "step": 97 }, { "epoch": 0.03807581166071732, "grad_norm": 0.890625, "learning_rate": 7.596899224806202e-05, "loss": 2.5973, "step": 98 }, { "epoch": 0.0384643403511328, "grad_norm": 1.96875, "learning_rate": 7.674418604651163e-05, "loss": 2.5859, "step": 99 }, { "epoch": 0.038852869041548284, "grad_norm": 0.76953125, "learning_rate": 7.751937984496124e-05, "loss": 2.6385, "step": 100 }, { "epoch": 0.03924139773196377, "grad_norm": 0.85546875, "learning_rate": 7.829457364341086e-05, "loss": 2.6456, "step": 101 }, { "epoch": 0.03962992642237925, "grad_norm": 0.75, "learning_rate": 7.906976744186047e-05, "loss": 2.5322, "step": 102 }, { "epoch": 0.04001845511279473, "grad_norm": 0.78125, "learning_rate": 7.984496124031008e-05, "loss": 2.5148, "step": 103 }, { "epoch": 0.04040698380321022, "grad_norm": 0.78125, "learning_rate": 8.062015503875969e-05, "loss": 2.5003, "step": 104 }, { "epoch": 0.0407955124936257, "grad_norm": 0.87890625, "learning_rate": 8.139534883720931e-05, "loss": 2.6288, "step": 105 }, { "epoch": 0.04118404118404118, "grad_norm": 0.73828125, "learning_rate": 8.217054263565892e-05, "loss": 2.659, "step": 106 }, { "epoch": 0.041572569874456664, "grad_norm": 0.83984375, "learning_rate": 8.294573643410853e-05, "loss": 2.5254, "step": 107 }, { "epoch": 0.04196109856487215, "grad_norm": 0.79296875, "learning_rate": 8.372093023255814e-05, "loss": 2.5423, "step": 108 }, { "epoch": 0.04234962725528763, "grad_norm": 0.7890625, "learning_rate": 8.449612403100775e-05, "loss": 2.549, "step": 109 }, { "epoch": 0.04273815594570311, "grad_norm": 0.796875, "learning_rate": 8.527131782945736e-05, "loss": 2.6419, "step": 110 }, { "epoch": 0.0431266846361186, "grad_norm": 0.8671875, "learning_rate": 8.604651162790697e-05, "loss": 2.5485, "step": 111 }, { "epoch": 0.04351521332653408, "grad_norm": 0.8515625, "learning_rate": 8.68217054263566e-05, "loss": 2.5752, "step": 112 }, { "epoch": 0.04390374201694956, "grad_norm": 0.84375, "learning_rate": 8.759689922480621e-05, "loss": 2.5693, "step": 113 }, { "epoch": 0.04429227070736505, "grad_norm": 0.828125, "learning_rate": 8.837209302325582e-05, "loss": 2.5739, "step": 114 }, { "epoch": 0.04468079939778053, "grad_norm": 0.83203125, "learning_rate": 8.914728682170543e-05, "loss": 2.5961, "step": 115 }, { "epoch": 0.04506932808819601, "grad_norm": 0.78125, "learning_rate": 8.992248062015505e-05, "loss": 2.5351, "step": 116 }, { "epoch": 0.04545785677861149, "grad_norm": 0.76953125, "learning_rate": 9.069767441860465e-05, "loss": 2.5145, "step": 117 }, { "epoch": 0.04584638546902698, "grad_norm": 0.796875, "learning_rate": 9.147286821705426e-05, "loss": 2.5982, "step": 118 }, { "epoch": 0.04623491415944246, "grad_norm": 0.75390625, "learning_rate": 9.224806201550387e-05, "loss": 2.5882, "step": 119 }, { "epoch": 0.04662344284985794, "grad_norm": 0.84765625, "learning_rate": 9.30232558139535e-05, "loss": 2.6582, "step": 120 }, { "epoch": 0.04701197154027343, "grad_norm": 0.83203125, "learning_rate": 9.379844961240311e-05, "loss": 2.5619, "step": 121 }, { "epoch": 0.04740050023068891, "grad_norm": 0.90234375, "learning_rate": 9.457364341085272e-05, "loss": 2.588, "step": 122 }, { "epoch": 0.04778902892110439, "grad_norm": 0.7734375, "learning_rate": 9.534883720930233e-05, "loss": 2.5661, "step": 123 }, { "epoch": 0.04817755761151987, "grad_norm": 0.83984375, "learning_rate": 9.612403100775195e-05, "loss": 2.5462, "step": 124 }, { "epoch": 0.04856608630193536, "grad_norm": 0.8203125, "learning_rate": 9.689922480620155e-05, "loss": 2.5859, "step": 125 }, { "epoch": 0.04895461499235084, "grad_norm": 0.89453125, "learning_rate": 9.767441860465116e-05, "loss": 2.6039, "step": 126 }, { "epoch": 0.04934314368276632, "grad_norm": 0.84765625, "learning_rate": 9.844961240310078e-05, "loss": 2.6323, "step": 127 }, { "epoch": 0.04973167237318181, "grad_norm": 0.75390625, "learning_rate": 9.92248062015504e-05, "loss": 2.5727, "step": 128 }, { "epoch": 0.05012020106359729, "grad_norm": 0.81640625, "learning_rate": 0.0001, "loss": 2.604, "step": 129 }, { "epoch": 0.05050872975401277, "grad_norm": 0.7890625, "learning_rate": 0.00010077519379844962, "loss": 2.5792, "step": 130 }, { "epoch": 0.05089725844442826, "grad_norm": 0.7265625, "learning_rate": 0.00010155038759689923, "loss": 2.5262, "step": 131 }, { "epoch": 0.05128578713484374, "grad_norm": 0.82421875, "learning_rate": 0.00010232558139534885, "loss": 2.6018, "step": 132 }, { "epoch": 0.05167431582525922, "grad_norm": 0.875, "learning_rate": 0.00010310077519379846, "loss": 2.5635, "step": 133 }, { "epoch": 0.0520628445156747, "grad_norm": 0.77734375, "learning_rate": 0.00010387596899224807, "loss": 2.5924, "step": 134 }, { "epoch": 0.05245137320609019, "grad_norm": 0.84375, "learning_rate": 0.00010465116279069768, "loss": 2.6196, "step": 135 }, { "epoch": 0.05283990189650567, "grad_norm": 0.8359375, "learning_rate": 0.0001054263565891473, "loss": 2.5211, "step": 136 }, { "epoch": 0.05322843058692115, "grad_norm": 0.78125, "learning_rate": 0.00010620155038759692, "loss": 2.5739, "step": 137 }, { "epoch": 0.05361695927733664, "grad_norm": 0.7890625, "learning_rate": 0.00010697674418604651, "loss": 2.5201, "step": 138 }, { "epoch": 0.05400548796775212, "grad_norm": 0.7578125, "learning_rate": 0.00010775193798449612, "loss": 2.5191, "step": 139 }, { "epoch": 0.0543940166581676, "grad_norm": 0.88671875, "learning_rate": 0.00010852713178294573, "loss": 2.5712, "step": 140 }, { "epoch": 0.05478254534858308, "grad_norm": 0.80859375, "learning_rate": 0.00010930232558139534, "loss": 2.5579, "step": 141 }, { "epoch": 0.05517107403899857, "grad_norm": 0.86328125, "learning_rate": 0.00011007751937984496, "loss": 2.5769, "step": 142 }, { "epoch": 0.05555960272941405, "grad_norm": 0.81640625, "learning_rate": 0.00011085271317829458, "loss": 2.5025, "step": 143 }, { "epoch": 0.05594813141982953, "grad_norm": 0.80859375, "learning_rate": 0.00011162790697674419, "loss": 2.4641, "step": 144 }, { "epoch": 0.05633666011024502, "grad_norm": 0.78125, "learning_rate": 0.0001124031007751938, "loss": 2.4801, "step": 145 }, { "epoch": 0.0567251888006605, "grad_norm": 0.8515625, "learning_rate": 0.00011317829457364341, "loss": 2.5683, "step": 146 }, { "epoch": 0.05711371749107598, "grad_norm": 0.8046875, "learning_rate": 0.00011395348837209304, "loss": 2.5542, "step": 147 }, { "epoch": 0.05750224618149147, "grad_norm": 0.796875, "learning_rate": 0.00011472868217054265, "loss": 2.4648, "step": 148 }, { "epoch": 0.05789077487190695, "grad_norm": 0.8203125, "learning_rate": 0.00011550387596899226, "loss": 2.548, "step": 149 }, { "epoch": 0.05827930356232243, "grad_norm": 0.80078125, "learning_rate": 0.00011627906976744187, "loss": 2.6214, "step": 150 }, { "epoch": 0.05866783225273791, "grad_norm": 0.859375, "learning_rate": 0.00011705426356589149, "loss": 2.5642, "step": 151 }, { "epoch": 0.0590563609431534, "grad_norm": 0.76171875, "learning_rate": 0.0001178294573643411, "loss": 2.5584, "step": 152 }, { "epoch": 0.05944488963356888, "grad_norm": 0.80859375, "learning_rate": 0.00011860465116279071, "loss": 2.5286, "step": 153 }, { "epoch": 0.05983341832398436, "grad_norm": 0.875, "learning_rate": 0.00011937984496124031, "loss": 2.6014, "step": 154 }, { "epoch": 0.06022194701439985, "grad_norm": 0.8671875, "learning_rate": 0.00012015503875968992, "loss": 2.5721, "step": 155 }, { "epoch": 0.06061047570481533, "grad_norm": 0.8359375, "learning_rate": 0.00012093023255813953, "loss": 2.5731, "step": 156 }, { "epoch": 0.06099900439523081, "grad_norm": 0.79296875, "learning_rate": 0.00012170542635658914, "loss": 2.4835, "step": 157 }, { "epoch": 0.06138753308564629, "grad_norm": 0.7890625, "learning_rate": 0.00012248062015503876, "loss": 2.5875, "step": 158 }, { "epoch": 0.06177606177606178, "grad_norm": 0.77734375, "learning_rate": 0.00012325581395348836, "loss": 2.5394, "step": 159 }, { "epoch": 0.06216459046647726, "grad_norm": 0.75390625, "learning_rate": 0.00012403100775193799, "loss": 2.5477, "step": 160 }, { "epoch": 0.06255311915689274, "grad_norm": 0.8359375, "learning_rate": 0.0001248062015503876, "loss": 2.4915, "step": 161 }, { "epoch": 0.06294164784730823, "grad_norm": 0.859375, "learning_rate": 0.0001255813953488372, "loss": 2.5073, "step": 162 }, { "epoch": 0.0633301765377237, "grad_norm": 0.9140625, "learning_rate": 0.00012635658914728683, "loss": 2.5843, "step": 163 }, { "epoch": 0.06371870522813919, "grad_norm": 0.83984375, "learning_rate": 0.00012713178294573646, "loss": 2.5689, "step": 164 }, { "epoch": 0.06410723391855468, "grad_norm": 0.796875, "learning_rate": 0.00012790697674418605, "loss": 2.5615, "step": 165 }, { "epoch": 0.06449576260897015, "grad_norm": 0.8359375, "learning_rate": 0.00012868217054263568, "loss": 2.519, "step": 166 }, { "epoch": 0.06488429129938564, "grad_norm": 0.90625, "learning_rate": 0.00012945736434108527, "loss": 2.596, "step": 167 }, { "epoch": 0.06527281998980113, "grad_norm": 0.8203125, "learning_rate": 0.0001302325581395349, "loss": 2.5081, "step": 168 }, { "epoch": 0.0656613486802166, "grad_norm": 0.80078125, "learning_rate": 0.00013100775193798452, "loss": 2.5528, "step": 169 }, { "epoch": 0.06604987737063209, "grad_norm": 0.828125, "learning_rate": 0.0001317829457364341, "loss": 2.5174, "step": 170 }, { "epoch": 0.06643840606104758, "grad_norm": 0.765625, "learning_rate": 0.00013255813953488372, "loss": 2.5159, "step": 171 }, { "epoch": 0.06682693475146305, "grad_norm": 0.78515625, "learning_rate": 0.00013333333333333334, "loss": 2.5556, "step": 172 }, { "epoch": 0.06721546344187854, "grad_norm": 0.8125, "learning_rate": 0.00013410852713178294, "loss": 2.5453, "step": 173 }, { "epoch": 0.06760399213229402, "grad_norm": 0.8046875, "learning_rate": 0.00013488372093023256, "loss": 2.5447, "step": 174 }, { "epoch": 0.0679925208227095, "grad_norm": 0.79296875, "learning_rate": 0.00013565891472868218, "loss": 2.5426, "step": 175 }, { "epoch": 0.06838104951312499, "grad_norm": 0.765625, "learning_rate": 0.00013643410852713178, "loss": 2.5165, "step": 176 }, { "epoch": 0.06876957820354047, "grad_norm": 0.85546875, "learning_rate": 0.0001372093023255814, "loss": 2.4734, "step": 177 }, { "epoch": 0.06915810689395595, "grad_norm": 0.78515625, "learning_rate": 0.000137984496124031, "loss": 2.4351, "step": 178 }, { "epoch": 0.06954663558437144, "grad_norm": 0.83203125, "learning_rate": 0.00013875968992248063, "loss": 2.5237, "step": 179 }, { "epoch": 0.06993516427478691, "grad_norm": 0.76953125, "learning_rate": 0.00013953488372093025, "loss": 2.5439, "step": 180 }, { "epoch": 0.0703236929652024, "grad_norm": 0.8203125, "learning_rate": 0.00014031007751937985, "loss": 2.4856, "step": 181 }, { "epoch": 0.07071222165561789, "grad_norm": 0.9140625, "learning_rate": 0.00014108527131782947, "loss": 2.4619, "step": 182 }, { "epoch": 0.07110075034603336, "grad_norm": 0.8046875, "learning_rate": 0.0001418604651162791, "loss": 2.5004, "step": 183 }, { "epoch": 0.07148927903644885, "grad_norm": 0.828125, "learning_rate": 0.0001426356589147287, "loss": 2.4954, "step": 184 }, { "epoch": 0.07187780772686433, "grad_norm": 0.8515625, "learning_rate": 0.00014341085271317832, "loss": 2.5676, "step": 185 }, { "epoch": 0.07226633641727981, "grad_norm": 0.86328125, "learning_rate": 0.00014418604651162791, "loss": 2.5853, "step": 186 }, { "epoch": 0.0726548651076953, "grad_norm": 0.76953125, "learning_rate": 0.0001449612403100775, "loss": 2.4586, "step": 187 }, { "epoch": 0.07304339379811078, "grad_norm": 0.9609375, "learning_rate": 0.00014573643410852714, "loss": 2.4955, "step": 188 }, { "epoch": 0.07343192248852626, "grad_norm": 0.9296875, "learning_rate": 0.00014651162790697673, "loss": 2.5761, "step": 189 }, { "epoch": 0.07382045117894175, "grad_norm": 0.796875, "learning_rate": 0.00014728682170542636, "loss": 2.5159, "step": 190 }, { "epoch": 0.07420897986935723, "grad_norm": 0.8828125, "learning_rate": 0.00014806201550387598, "loss": 2.4549, "step": 191 }, { "epoch": 0.07459750855977271, "grad_norm": 0.88671875, "learning_rate": 0.00014883720930232558, "loss": 2.4911, "step": 192 }, { "epoch": 0.0749860372501882, "grad_norm": 0.8515625, "learning_rate": 0.0001496124031007752, "loss": 2.5391, "step": 193 }, { "epoch": 0.07537456594060368, "grad_norm": 0.875, "learning_rate": 0.00015038759689922483, "loss": 2.5419, "step": 194 }, { "epoch": 0.07576309463101916, "grad_norm": 0.91796875, "learning_rate": 0.00015116279069767442, "loss": 2.5355, "step": 195 }, { "epoch": 0.07615162332143464, "grad_norm": 0.8671875, "learning_rate": 0.00015193798449612405, "loss": 2.5481, "step": 196 }, { "epoch": 0.07654015201185012, "grad_norm": 0.8359375, "learning_rate": 0.00015271317829457364, "loss": 2.5098, "step": 197 }, { "epoch": 0.0769286807022656, "grad_norm": 0.81640625, "learning_rate": 0.00015348837209302327, "loss": 2.47, "step": 198 }, { "epoch": 0.0773172093926811, "grad_norm": 0.875, "learning_rate": 0.0001542635658914729, "loss": 2.5496, "step": 199 }, { "epoch": 0.07770573808309657, "grad_norm": 0.921875, "learning_rate": 0.0001550387596899225, "loss": 2.579, "step": 200 }, { "epoch": 0.07809426677351206, "grad_norm": 0.89453125, "learning_rate": 0.0001558139534883721, "loss": 2.538, "step": 201 }, { "epoch": 0.07848279546392754, "grad_norm": 0.83203125, "learning_rate": 0.0001565891472868217, "loss": 2.4561, "step": 202 }, { "epoch": 0.07887132415434302, "grad_norm": 0.88671875, "learning_rate": 0.0001573643410852713, "loss": 2.4939, "step": 203 }, { "epoch": 0.0792598528447585, "grad_norm": 0.93359375, "learning_rate": 0.00015813953488372093, "loss": 2.5002, "step": 204 }, { "epoch": 0.07964838153517399, "grad_norm": 0.81640625, "learning_rate": 0.00015891472868217056, "loss": 2.4702, "step": 205 }, { "epoch": 0.08003691022558947, "grad_norm": 0.8125, "learning_rate": 0.00015968992248062015, "loss": 2.4821, "step": 206 }, { "epoch": 0.08042543891600495, "grad_norm": 0.94140625, "learning_rate": 0.00016046511627906978, "loss": 2.5318, "step": 207 }, { "epoch": 0.08081396760642044, "grad_norm": 0.90234375, "learning_rate": 0.00016124031007751937, "loss": 2.4792, "step": 208 }, { "epoch": 0.08120249629683592, "grad_norm": 0.78515625, "learning_rate": 0.000162015503875969, "loss": 2.432, "step": 209 }, { "epoch": 0.0815910249872514, "grad_norm": 0.90625, "learning_rate": 0.00016279069767441862, "loss": 2.5145, "step": 210 }, { "epoch": 0.08197955367766689, "grad_norm": 0.84765625, "learning_rate": 0.00016356589147286822, "loss": 2.4505, "step": 211 }, { "epoch": 0.08236808236808237, "grad_norm": 0.94921875, "learning_rate": 0.00016434108527131784, "loss": 2.5375, "step": 212 }, { "epoch": 0.08275661105849785, "grad_norm": 0.91796875, "learning_rate": 0.00016511627906976747, "loss": 2.4642, "step": 213 }, { "epoch": 0.08314513974891333, "grad_norm": 0.9140625, "learning_rate": 0.00016589147286821706, "loss": 2.5279, "step": 214 }, { "epoch": 0.08353366843932881, "grad_norm": 0.8828125, "learning_rate": 0.0001666666666666667, "loss": 2.5083, "step": 215 }, { "epoch": 0.0839221971297443, "grad_norm": 0.98046875, "learning_rate": 0.00016744186046511629, "loss": 2.5472, "step": 216 }, { "epoch": 0.08431072582015978, "grad_norm": 0.9296875, "learning_rate": 0.0001682170542635659, "loss": 2.5635, "step": 217 }, { "epoch": 0.08469925451057526, "grad_norm": 0.9296875, "learning_rate": 0.0001689922480620155, "loss": 2.4878, "step": 218 }, { "epoch": 0.08508778320099075, "grad_norm": 0.9296875, "learning_rate": 0.0001697674418604651, "loss": 2.5143, "step": 219 }, { "epoch": 0.08547631189140623, "grad_norm": 0.94921875, "learning_rate": 0.00017054263565891473, "loss": 2.5344, "step": 220 }, { "epoch": 0.08586484058182171, "grad_norm": 0.83984375, "learning_rate": 0.00017131782945736435, "loss": 2.5229, "step": 221 }, { "epoch": 0.0862533692722372, "grad_norm": 0.9375, "learning_rate": 0.00017209302325581395, "loss": 2.5182, "step": 222 }, { "epoch": 0.08664189796265268, "grad_norm": 0.91015625, "learning_rate": 0.00017286821705426357, "loss": 2.5788, "step": 223 }, { "epoch": 0.08703042665306816, "grad_norm": 0.8984375, "learning_rate": 0.0001736434108527132, "loss": 2.4586, "step": 224 }, { "epoch": 0.08741895534348365, "grad_norm": 0.984375, "learning_rate": 0.0001744186046511628, "loss": 2.4999, "step": 225 }, { "epoch": 0.08780748403389912, "grad_norm": 0.84765625, "learning_rate": 0.00017519379844961242, "loss": 2.4922, "step": 226 }, { "epoch": 0.08819601272431461, "grad_norm": 1.15625, "learning_rate": 0.00017596899224806201, "loss": 2.5033, "step": 227 }, { "epoch": 0.0885845414147301, "grad_norm": 0.953125, "learning_rate": 0.00017674418604651164, "loss": 2.5631, "step": 228 }, { "epoch": 0.08897307010514557, "grad_norm": 1.1640625, "learning_rate": 0.00017751937984496126, "loss": 2.5307, "step": 229 }, { "epoch": 0.08936159879556106, "grad_norm": 0.9375, "learning_rate": 0.00017829457364341086, "loss": 2.5699, "step": 230 }, { "epoch": 0.08975012748597654, "grad_norm": 1.0546875, "learning_rate": 0.00017906976744186048, "loss": 2.5328, "step": 231 }, { "epoch": 0.09013865617639202, "grad_norm": 0.9765625, "learning_rate": 0.0001798449612403101, "loss": 2.4664, "step": 232 }, { "epoch": 0.09052718486680751, "grad_norm": 0.9609375, "learning_rate": 0.0001806201550387597, "loss": 2.4832, "step": 233 }, { "epoch": 0.09091571355722299, "grad_norm": 0.921875, "learning_rate": 0.0001813953488372093, "loss": 2.5115, "step": 234 }, { "epoch": 0.09130424224763847, "grad_norm": 0.921875, "learning_rate": 0.00018217054263565893, "loss": 2.4564, "step": 235 }, { "epoch": 0.09169277093805396, "grad_norm": 0.9375, "learning_rate": 0.00018294573643410852, "loss": 2.4124, "step": 236 }, { "epoch": 0.09208129962846943, "grad_norm": 0.85546875, "learning_rate": 0.00018372093023255815, "loss": 2.4601, "step": 237 }, { "epoch": 0.09246982831888492, "grad_norm": 0.9296875, "learning_rate": 0.00018449612403100774, "loss": 2.5447, "step": 238 }, { "epoch": 0.09285835700930041, "grad_norm": 0.90234375, "learning_rate": 0.00018527131782945737, "loss": 2.4984, "step": 239 }, { "epoch": 0.09324688569971588, "grad_norm": 0.90234375, "learning_rate": 0.000186046511627907, "loss": 2.4448, "step": 240 }, { "epoch": 0.09363541439013137, "grad_norm": 0.90234375, "learning_rate": 0.0001868217054263566, "loss": 2.501, "step": 241 }, { "epoch": 0.09402394308054686, "grad_norm": 0.9375, "learning_rate": 0.00018759689922480621, "loss": 2.4614, "step": 242 }, { "epoch": 0.09441247177096233, "grad_norm": 0.8984375, "learning_rate": 0.00018837209302325584, "loss": 2.5024, "step": 243 }, { "epoch": 0.09480100046137782, "grad_norm": 0.87890625, "learning_rate": 0.00018914728682170543, "loss": 2.4858, "step": 244 }, { "epoch": 0.09518952915179331, "grad_norm": 1.015625, "learning_rate": 0.00018992248062015506, "loss": 2.55, "step": 245 }, { "epoch": 0.09557805784220878, "grad_norm": 0.9375, "learning_rate": 0.00019069767441860466, "loss": 2.5271, "step": 246 }, { "epoch": 0.09596658653262427, "grad_norm": 0.875, "learning_rate": 0.00019147286821705428, "loss": 2.4523, "step": 247 }, { "epoch": 0.09635511522303974, "grad_norm": 0.9140625, "learning_rate": 0.0001922480620155039, "loss": 2.4744, "step": 248 }, { "epoch": 0.09674364391345523, "grad_norm": 0.87890625, "learning_rate": 0.0001930232558139535, "loss": 2.5274, "step": 249 }, { "epoch": 0.09713217260387072, "grad_norm": 0.90234375, "learning_rate": 0.0001937984496124031, "loss": 2.5121, "step": 250 }, { "epoch": 0.0975207012942862, "grad_norm": 0.8515625, "learning_rate": 0.00019457364341085272, "loss": 2.482, "step": 251 }, { "epoch": 0.09790922998470168, "grad_norm": 0.84375, "learning_rate": 0.00019534883720930232, "loss": 2.5032, "step": 252 }, { "epoch": 0.09829775867511717, "grad_norm": 0.90234375, "learning_rate": 0.00019612403100775194, "loss": 2.5395, "step": 253 }, { "epoch": 0.09868628736553264, "grad_norm": 0.8984375, "learning_rate": 0.00019689922480620157, "loss": 2.485, "step": 254 }, { "epoch": 0.09907481605594813, "grad_norm": 0.8984375, "learning_rate": 0.00019767441860465116, "loss": 2.4981, "step": 255 }, { "epoch": 0.09946334474636362, "grad_norm": 0.94921875, "learning_rate": 0.0001984496124031008, "loss": 2.5291, "step": 256 }, { "epoch": 0.09985187343677909, "grad_norm": 0.88671875, "learning_rate": 0.00019922480620155039, "loss": 2.4618, "step": 257 }, { "epoch": 0.10024040212719458, "grad_norm": 0.87109375, "learning_rate": 0.0002, "loss": 2.4954, "step": 258 }, { "epoch": 0.10062893081761007, "grad_norm": 0.828125, "learning_rate": 0.000199999907919495, "loss": 2.5519, "step": 259 }, { "epoch": 0.10101745950802554, "grad_norm": 0.9296875, "learning_rate": 0.00019999963167814967, "loss": 2.5409, "step": 260 }, { "epoch": 0.10140598819844103, "grad_norm": 0.96875, "learning_rate": 0.0001999991712764726, "loss": 2.4918, "step": 261 }, { "epoch": 0.10179451688885652, "grad_norm": 0.96875, "learning_rate": 0.0001999985267153118, "loss": 2.5241, "step": 262 }, { "epoch": 0.10218304557927199, "grad_norm": 0.91015625, "learning_rate": 0.00019999769799585422, "loss": 2.5064, "step": 263 }, { "epoch": 0.10257157426968748, "grad_norm": 0.87109375, "learning_rate": 0.00019999668511962607, "loss": 2.5119, "step": 264 }, { "epoch": 0.10296010296010295, "grad_norm": 1.0, "learning_rate": 0.00019999548808849268, "loss": 2.4937, "step": 265 }, { "epoch": 0.10334863165051844, "grad_norm": 1.0546875, "learning_rate": 0.00019999410690465852, "loss": 2.5707, "step": 266 }, { "epoch": 0.10373716034093393, "grad_norm": 0.94140625, "learning_rate": 0.00019999254157066716, "loss": 2.458, "step": 267 }, { "epoch": 0.1041256890313494, "grad_norm": 1.0625, "learning_rate": 0.00019999079208940135, "loss": 2.5633, "step": 268 }, { "epoch": 0.10451421772176489, "grad_norm": 0.91796875, "learning_rate": 0.00019998885846408295, "loss": 2.5172, "step": 269 }, { "epoch": 0.10490274641218038, "grad_norm": 0.96484375, "learning_rate": 0.00019998674069827293, "loss": 2.531, "step": 270 }, { "epoch": 0.10529127510259585, "grad_norm": 0.8671875, "learning_rate": 0.00019998443879587144, "loss": 2.5153, "step": 271 }, { "epoch": 0.10567980379301134, "grad_norm": 0.87890625, "learning_rate": 0.00019998195276111762, "loss": 2.4719, "step": 272 }, { "epoch": 0.10606833248342683, "grad_norm": 0.859375, "learning_rate": 0.00019997928259858985, "loss": 2.4519, "step": 273 }, { "epoch": 0.1064568611738423, "grad_norm": 0.91796875, "learning_rate": 0.00019997642831320547, "loss": 2.3848, "step": 274 }, { "epoch": 0.10684538986425779, "grad_norm": 0.94921875, "learning_rate": 0.00019997338991022096, "loss": 2.4939, "step": 275 }, { "epoch": 0.10723391855467328, "grad_norm": 0.921875, "learning_rate": 0.00019997016739523192, "loss": 2.4016, "step": 276 }, { "epoch": 0.10762244724508875, "grad_norm": 0.9296875, "learning_rate": 0.00019996676077417292, "loss": 2.5356, "step": 277 }, { "epoch": 0.10801097593550424, "grad_norm": 0.91796875, "learning_rate": 0.00019996317005331767, "loss": 2.5133, "step": 278 }, { "epoch": 0.10839950462591973, "grad_norm": 0.96484375, "learning_rate": 0.00019995939523927882, "loss": 2.4938, "step": 279 }, { "epoch": 0.1087880333163352, "grad_norm": 0.87109375, "learning_rate": 0.00019995543633900817, "loss": 2.4887, "step": 280 }, { "epoch": 0.10917656200675069, "grad_norm": 0.96875, "learning_rate": 0.00019995129335979644, "loss": 2.4348, "step": 281 }, { "epoch": 0.10956509069716616, "grad_norm": 0.91796875, "learning_rate": 0.00019994696630927337, "loss": 2.4854, "step": 282 }, { "epoch": 0.10995361938758165, "grad_norm": 1.1484375, "learning_rate": 0.0001999424551954077, "loss": 2.517, "step": 283 }, { "epoch": 0.11034214807799714, "grad_norm": 0.984375, "learning_rate": 0.0001999377600265072, "loss": 2.4481, "step": 284 }, { "epoch": 0.11073067676841261, "grad_norm": 1.046875, "learning_rate": 0.00019993288081121845, "loss": 2.4257, "step": 285 }, { "epoch": 0.1111192054588281, "grad_norm": 0.91796875, "learning_rate": 0.00019992781755852712, "loss": 2.5133, "step": 286 }, { "epoch": 0.11150773414924359, "grad_norm": 0.8828125, "learning_rate": 0.00019992257027775777, "loss": 2.5131, "step": 287 }, { "epoch": 0.11189626283965906, "grad_norm": 0.953125, "learning_rate": 0.00019991713897857377, "loss": 2.5679, "step": 288 }, { "epoch": 0.11228479153007455, "grad_norm": 0.8984375, "learning_rate": 0.00019991152367097753, "loss": 2.4768, "step": 289 }, { "epoch": 0.11267332022049004, "grad_norm": 0.99609375, "learning_rate": 0.0001999057243653102, "loss": 2.4971, "step": 290 }, { "epoch": 0.11306184891090551, "grad_norm": 0.88671875, "learning_rate": 0.00019989974107225184, "loss": 2.4146, "step": 291 }, { "epoch": 0.113450377601321, "grad_norm": 0.90625, "learning_rate": 0.0001998935738028214, "loss": 2.4849, "step": 292 }, { "epoch": 0.11383890629173649, "grad_norm": 0.921875, "learning_rate": 0.00019988722256837656, "loss": 2.5275, "step": 293 }, { "epoch": 0.11422743498215196, "grad_norm": 0.9765625, "learning_rate": 0.0001998806873806138, "loss": 2.4933, "step": 294 }, { "epoch": 0.11461596367256745, "grad_norm": 0.8984375, "learning_rate": 0.00019987396825156838, "loss": 2.4731, "step": 295 }, { "epoch": 0.11500449236298294, "grad_norm": 0.8984375, "learning_rate": 0.00019986706519361434, "loss": 2.489, "step": 296 }, { "epoch": 0.11539302105339841, "grad_norm": 0.9453125, "learning_rate": 0.00019985997821946443, "loss": 2.4249, "step": 297 }, { "epoch": 0.1157815497438139, "grad_norm": 0.91015625, "learning_rate": 0.00019985270734217006, "loss": 2.4817, "step": 298 }, { "epoch": 0.11617007843422937, "grad_norm": 1.0546875, "learning_rate": 0.00019984525257512142, "loss": 2.5107, "step": 299 }, { "epoch": 0.11655860712464486, "grad_norm": 0.953125, "learning_rate": 0.00019983761393204717, "loss": 2.5681, "step": 300 }, { "epoch": 0.11694713581506035, "grad_norm": 0.9609375, "learning_rate": 0.0001998297914270148, "loss": 2.5334, "step": 301 }, { "epoch": 0.11733566450547582, "grad_norm": 0.9453125, "learning_rate": 0.00019982178507443028, "loss": 2.4393, "step": 302 }, { "epoch": 0.11772419319589131, "grad_norm": 0.9140625, "learning_rate": 0.0001998135948890382, "loss": 2.4662, "step": 303 }, { "epoch": 0.1181127218863068, "grad_norm": 0.875, "learning_rate": 0.0001998052208859217, "loss": 2.4961, "step": 304 }, { "epoch": 0.11850125057672227, "grad_norm": 0.90234375, "learning_rate": 0.00019979666308050242, "loss": 2.4746, "step": 305 }, { "epoch": 0.11888977926713776, "grad_norm": 1.015625, "learning_rate": 0.0001997879214885405, "loss": 2.508, "step": 306 }, { "epoch": 0.11927830795755325, "grad_norm": 0.83203125, "learning_rate": 0.0001997789961261345, "loss": 2.4174, "step": 307 }, { "epoch": 0.11966683664796872, "grad_norm": 1.0078125, "learning_rate": 0.00019976988700972154, "loss": 2.567, "step": 308 }, { "epoch": 0.1200553653383842, "grad_norm": 0.875, "learning_rate": 0.00019976059415607698, "loss": 2.551, "step": 309 }, { "epoch": 0.1204438940287997, "grad_norm": 0.93359375, "learning_rate": 0.0001997511175823147, "loss": 2.5188, "step": 310 }, { "epoch": 0.12083242271921517, "grad_norm": 0.88671875, "learning_rate": 0.00019974145730588684, "loss": 2.5237, "step": 311 }, { "epoch": 0.12122095140963066, "grad_norm": 0.94140625, "learning_rate": 0.00019973161334458386, "loss": 2.4621, "step": 312 }, { "epoch": 0.12160948010004614, "grad_norm": 0.9296875, "learning_rate": 0.00019972158571653444, "loss": 2.4367, "step": 313 }, { "epoch": 0.12199800879046162, "grad_norm": 0.890625, "learning_rate": 0.00019971137444020563, "loss": 2.3794, "step": 314 }, { "epoch": 0.1223865374808771, "grad_norm": 0.97265625, "learning_rate": 0.00019970097953440262, "loss": 2.402, "step": 315 }, { "epoch": 0.12277506617129258, "grad_norm": 1.03125, "learning_rate": 0.00019969040101826873, "loss": 2.4515, "step": 316 }, { "epoch": 0.12316359486170807, "grad_norm": 0.96484375, "learning_rate": 0.00019967963891128547, "loss": 2.4462, "step": 317 }, { "epoch": 0.12355212355212356, "grad_norm": 0.9453125, "learning_rate": 0.00019966869323327248, "loss": 2.4375, "step": 318 }, { "epoch": 0.12394065224253903, "grad_norm": 0.84375, "learning_rate": 0.00019965756400438739, "loss": 2.4505, "step": 319 }, { "epoch": 0.12432918093295452, "grad_norm": 1.1171875, "learning_rate": 0.00019964625124512596, "loss": 2.4783, "step": 320 }, { "epoch": 0.12471770962337, "grad_norm": 1.0234375, "learning_rate": 0.00019963475497632182, "loss": 2.4582, "step": 321 }, { "epoch": 0.12510623831378548, "grad_norm": 0.91796875, "learning_rate": 0.00019962307521914662, "loss": 2.5833, "step": 322 }, { "epoch": 0.12549476700420098, "grad_norm": 1.0703125, "learning_rate": 0.00019961121199510998, "loss": 2.4726, "step": 323 }, { "epoch": 0.12588329569461645, "grad_norm": 1.0234375, "learning_rate": 0.00019959916532605925, "loss": 2.5252, "step": 324 }, { "epoch": 0.12627182438503193, "grad_norm": 0.95703125, "learning_rate": 0.00019958693523417976, "loss": 2.512, "step": 325 }, { "epoch": 0.1266603530754474, "grad_norm": 0.9765625, "learning_rate": 0.00019957452174199452, "loss": 2.4786, "step": 326 }, { "epoch": 0.1270488817658629, "grad_norm": 0.9453125, "learning_rate": 0.0001995619248723644, "loss": 2.5086, "step": 327 }, { "epoch": 0.12743741045627838, "grad_norm": 0.91796875, "learning_rate": 0.00019954914464848786, "loss": 2.5146, "step": 328 }, { "epoch": 0.12782593914669385, "grad_norm": 1.1484375, "learning_rate": 0.00019953618109390115, "loss": 2.4517, "step": 329 }, { "epoch": 0.12821446783710935, "grad_norm": 0.94140625, "learning_rate": 0.00019952303423247805, "loss": 2.4473, "step": 330 }, { "epoch": 0.12860299652752483, "grad_norm": 1.1015625, "learning_rate": 0.00019950970408842995, "loss": 2.4946, "step": 331 }, { "epoch": 0.1289915252179403, "grad_norm": 0.9609375, "learning_rate": 0.00019949619068630576, "loss": 2.3974, "step": 332 }, { "epoch": 0.1293800539083558, "grad_norm": 1.0625, "learning_rate": 0.00019948249405099197, "loss": 2.5094, "step": 333 }, { "epoch": 0.12976858259877128, "grad_norm": 1.0078125, "learning_rate": 0.00019946861420771237, "loss": 2.4857, "step": 334 }, { "epoch": 0.13015711128918675, "grad_norm": 0.890625, "learning_rate": 0.00019945455118202822, "loss": 2.5822, "step": 335 }, { "epoch": 0.13054563997960225, "grad_norm": 0.9296875, "learning_rate": 0.0001994403049998382, "loss": 2.4962, "step": 336 }, { "epoch": 0.13093416867001773, "grad_norm": 0.91015625, "learning_rate": 0.00019942587568737817, "loss": 2.5256, "step": 337 }, { "epoch": 0.1313226973604332, "grad_norm": 0.97265625, "learning_rate": 0.0001994112632712213, "loss": 2.4939, "step": 338 }, { "epoch": 0.1317112260508487, "grad_norm": 0.859375, "learning_rate": 0.00019939646777827793, "loss": 2.4533, "step": 339 }, { "epoch": 0.13209975474126417, "grad_norm": 0.8515625, "learning_rate": 0.0001993814892357957, "loss": 2.476, "step": 340 }, { "epoch": 0.13248828343167965, "grad_norm": 0.83203125, "learning_rate": 0.0001993663276713591, "loss": 2.4901, "step": 341 }, { "epoch": 0.13287681212209515, "grad_norm": 0.85546875, "learning_rate": 0.00019935098311288995, "loss": 2.4806, "step": 342 }, { "epoch": 0.13326534081251062, "grad_norm": 0.8984375, "learning_rate": 0.00019933545558864686, "loss": 2.4668, "step": 343 }, { "epoch": 0.1336538695029261, "grad_norm": 0.84375, "learning_rate": 0.0001993197451272255, "loss": 2.5187, "step": 344 }, { "epoch": 0.1340423981933416, "grad_norm": 0.9140625, "learning_rate": 0.0001993038517575584, "loss": 2.5422, "step": 345 }, { "epoch": 0.13443092688375707, "grad_norm": 0.9375, "learning_rate": 0.000199287775508915, "loss": 2.472, "step": 346 }, { "epoch": 0.13481945557417255, "grad_norm": 0.9296875, "learning_rate": 0.00019927151641090145, "loss": 2.4643, "step": 347 }, { "epoch": 0.13520798426458805, "grad_norm": 0.92578125, "learning_rate": 0.00019925507449346065, "loss": 2.4486, "step": 348 }, { "epoch": 0.13559651295500352, "grad_norm": 0.94921875, "learning_rate": 0.00019923844978687224, "loss": 2.4472, "step": 349 }, { "epoch": 0.135985041645419, "grad_norm": 0.95703125, "learning_rate": 0.0001992216423217524, "loss": 2.3693, "step": 350 }, { "epoch": 0.1363735703358345, "grad_norm": 0.9921875, "learning_rate": 0.00019920465212905394, "loss": 2.4148, "step": 351 }, { "epoch": 0.13676209902624997, "grad_norm": 1.046875, "learning_rate": 0.00019918747924006623, "loss": 2.559, "step": 352 }, { "epoch": 0.13715062771666545, "grad_norm": 1.0546875, "learning_rate": 0.00019917012368641496, "loss": 2.433, "step": 353 }, { "epoch": 0.13753915640708095, "grad_norm": 1.234375, "learning_rate": 0.00019915258550006235, "loss": 2.4333, "step": 354 }, { "epoch": 0.13792768509749642, "grad_norm": 0.93359375, "learning_rate": 0.00019913486471330685, "loss": 2.4635, "step": 355 }, { "epoch": 0.1383162137879119, "grad_norm": 0.97265625, "learning_rate": 0.0001991169613587833, "loss": 2.4907, "step": 356 }, { "epoch": 0.1387047424783274, "grad_norm": 1.0, "learning_rate": 0.00019909887546946261, "loss": 2.4457, "step": 357 }, { "epoch": 0.13909327116874287, "grad_norm": 0.86328125, "learning_rate": 0.00019908060707865202, "loss": 2.4854, "step": 358 }, { "epoch": 0.13948179985915835, "grad_norm": 0.9140625, "learning_rate": 0.00019906215621999474, "loss": 2.4735, "step": 359 }, { "epoch": 0.13987032854957382, "grad_norm": 0.9140625, "learning_rate": 0.00019904352292747008, "loss": 2.537, "step": 360 }, { "epoch": 0.14025885723998932, "grad_norm": 0.88671875, "learning_rate": 0.00019902470723539328, "loss": 2.5154, "step": 361 }, { "epoch": 0.1406473859304048, "grad_norm": 0.96875, "learning_rate": 0.00019900570917841552, "loss": 2.4888, "step": 362 }, { "epoch": 0.14103591462082027, "grad_norm": 0.88671875, "learning_rate": 0.0001989865287915238, "loss": 2.4895, "step": 363 }, { "epoch": 0.14142444331123577, "grad_norm": 0.86328125, "learning_rate": 0.00019896716611004096, "loss": 2.464, "step": 364 }, { "epoch": 0.14181297200165124, "grad_norm": 0.8828125, "learning_rate": 0.00019894762116962544, "loss": 2.4514, "step": 365 }, { "epoch": 0.14220150069206672, "grad_norm": 1.03125, "learning_rate": 0.00019892789400627147, "loss": 2.455, "step": 366 }, { "epoch": 0.14259002938248222, "grad_norm": 0.8671875, "learning_rate": 0.0001989079846563087, "loss": 2.4248, "step": 367 }, { "epoch": 0.1429785580728977, "grad_norm": 0.953125, "learning_rate": 0.00019888789315640254, "loss": 2.5139, "step": 368 }, { "epoch": 0.14336708676331317, "grad_norm": 0.84375, "learning_rate": 0.00019886761954355354, "loss": 2.4619, "step": 369 }, { "epoch": 0.14375561545372867, "grad_norm": 0.90234375, "learning_rate": 0.00019884716385509785, "loss": 2.517, "step": 370 }, { "epoch": 0.14414414414414414, "grad_norm": 0.9375, "learning_rate": 0.0001988265261287069, "loss": 2.474, "step": 371 }, { "epoch": 0.14453267283455962, "grad_norm": 0.84375, "learning_rate": 0.00019880570640238733, "loss": 2.511, "step": 372 }, { "epoch": 0.14492120152497512, "grad_norm": 0.89453125, "learning_rate": 0.00019878470471448094, "loss": 2.4679, "step": 373 }, { "epoch": 0.1453097302153906, "grad_norm": 0.85546875, "learning_rate": 0.00019876352110366466, "loss": 2.4047, "step": 374 }, { "epoch": 0.14569825890580607, "grad_norm": 0.93359375, "learning_rate": 0.0001987421556089504, "loss": 2.5233, "step": 375 }, { "epoch": 0.14608678759622157, "grad_norm": 0.88671875, "learning_rate": 0.00019872060826968513, "loss": 2.4157, "step": 376 }, { "epoch": 0.14647531628663704, "grad_norm": 0.94140625, "learning_rate": 0.00019869887912555062, "loss": 2.4568, "step": 377 }, { "epoch": 0.14686384497705252, "grad_norm": 0.87890625, "learning_rate": 0.0001986769682165635, "loss": 2.4597, "step": 378 }, { "epoch": 0.14725237366746802, "grad_norm": 0.890625, "learning_rate": 0.0001986548755830751, "loss": 2.5093, "step": 379 }, { "epoch": 0.1476409023578835, "grad_norm": 0.89453125, "learning_rate": 0.00019863260126577146, "loss": 2.4912, "step": 380 }, { "epoch": 0.14802943104829897, "grad_norm": 0.875, "learning_rate": 0.00019861014530567314, "loss": 2.5111, "step": 381 }, { "epoch": 0.14841795973871447, "grad_norm": 0.83203125, "learning_rate": 0.0001985875077441353, "loss": 2.5189, "step": 382 }, { "epoch": 0.14880648842912994, "grad_norm": 0.85546875, "learning_rate": 0.00019856468862284752, "loss": 2.4845, "step": 383 }, { "epoch": 0.14919501711954541, "grad_norm": 0.88671875, "learning_rate": 0.0001985416879838337, "loss": 2.5138, "step": 384 }, { "epoch": 0.14958354580996092, "grad_norm": 0.9140625, "learning_rate": 0.00019851850586945203, "loss": 2.459, "step": 385 }, { "epoch": 0.1499720745003764, "grad_norm": 0.890625, "learning_rate": 0.00019849514232239495, "loss": 2.4799, "step": 386 }, { "epoch": 0.15036060319079186, "grad_norm": 0.921875, "learning_rate": 0.000198471597385689, "loss": 2.4408, "step": 387 }, { "epoch": 0.15074913188120737, "grad_norm": 0.8046875, "learning_rate": 0.00019844787110269478, "loss": 2.4064, "step": 388 }, { "epoch": 0.15113766057162284, "grad_norm": 0.95703125, "learning_rate": 0.00019842396351710685, "loss": 2.4311, "step": 389 }, { "epoch": 0.1515261892620383, "grad_norm": 0.83984375, "learning_rate": 0.00019839987467295362, "loss": 2.4982, "step": 390 }, { "epoch": 0.15191471795245381, "grad_norm": 0.83203125, "learning_rate": 0.00019837560461459744, "loss": 2.504, "step": 391 }, { "epoch": 0.1523032466428693, "grad_norm": 0.84765625, "learning_rate": 0.0001983511533867342, "loss": 2.5254, "step": 392 }, { "epoch": 0.15269177533328476, "grad_norm": 1.0078125, "learning_rate": 0.0001983265210343936, "loss": 2.4947, "step": 393 }, { "epoch": 0.15308030402370024, "grad_norm": 0.921875, "learning_rate": 0.00019830170760293877, "loss": 2.4509, "step": 394 }, { "epoch": 0.15346883271411574, "grad_norm": 0.90625, "learning_rate": 0.00019827671313806642, "loss": 2.4467, "step": 395 }, { "epoch": 0.1538573614045312, "grad_norm": 0.79296875, "learning_rate": 0.0001982515376858066, "loss": 2.4584, "step": 396 }, { "epoch": 0.15424589009494669, "grad_norm": 0.94140625, "learning_rate": 0.00019822618129252263, "loss": 2.4464, "step": 397 }, { "epoch": 0.1546344187853622, "grad_norm": 0.8515625, "learning_rate": 0.00019820064400491116, "loss": 2.471, "step": 398 }, { "epoch": 0.15502294747577766, "grad_norm": 0.90234375, "learning_rate": 0.0001981749258700019, "loss": 2.4903, "step": 399 }, { "epoch": 0.15541147616619314, "grad_norm": 0.85546875, "learning_rate": 0.00019814902693515764, "loss": 2.4695, "step": 400 }, { "epoch": 0.15580000485660864, "grad_norm": 0.83984375, "learning_rate": 0.00019812294724807407, "loss": 2.5074, "step": 401 }, { "epoch": 0.1561885335470241, "grad_norm": 0.89453125, "learning_rate": 0.00019809668685677988, "loss": 2.5358, "step": 402 }, { "epoch": 0.15657706223743958, "grad_norm": 0.87109375, "learning_rate": 0.0001980702458096364, "loss": 2.5308, "step": 403 }, { "epoch": 0.1569655909278551, "grad_norm": 0.84375, "learning_rate": 0.00019804362415533775, "loss": 2.4968, "step": 404 }, { "epoch": 0.15735411961827056, "grad_norm": 0.81640625, "learning_rate": 0.00019801682194291068, "loss": 2.478, "step": 405 }, { "epoch": 0.15774264830868603, "grad_norm": 0.9140625, "learning_rate": 0.00019798983922171437, "loss": 2.4332, "step": 406 }, { "epoch": 0.15813117699910154, "grad_norm": 0.99609375, "learning_rate": 0.00019796267604144045, "loss": 2.5514, "step": 407 }, { "epoch": 0.158519705689517, "grad_norm": 0.859375, "learning_rate": 0.00019793533245211298, "loss": 2.4069, "step": 408 }, { "epoch": 0.15890823437993248, "grad_norm": 0.8671875, "learning_rate": 0.00019790780850408816, "loss": 2.508, "step": 409 }, { "epoch": 0.15929676307034799, "grad_norm": 0.86328125, "learning_rate": 0.00019788010424805433, "loss": 2.4841, "step": 410 }, { "epoch": 0.15968529176076346, "grad_norm": 0.88671875, "learning_rate": 0.00019785221973503197, "loss": 2.4688, "step": 411 }, { "epoch": 0.16007382045117893, "grad_norm": 0.90625, "learning_rate": 0.00019782415501637347, "loss": 2.4655, "step": 412 }, { "epoch": 0.16046234914159443, "grad_norm": 0.93359375, "learning_rate": 0.0001977959101437631, "loss": 2.5057, "step": 413 }, { "epoch": 0.1608508778320099, "grad_norm": 0.890625, "learning_rate": 0.0001977674851692169, "loss": 2.4822, "step": 414 }, { "epoch": 0.16123940652242538, "grad_norm": 0.94140625, "learning_rate": 0.00019773888014508259, "loss": 2.4492, "step": 415 }, { "epoch": 0.16162793521284088, "grad_norm": 0.91796875, "learning_rate": 0.00019771009512403948, "loss": 2.4674, "step": 416 }, { "epoch": 0.16201646390325636, "grad_norm": 0.9453125, "learning_rate": 0.00019768113015909835, "loss": 2.489, "step": 417 }, { "epoch": 0.16240499259367183, "grad_norm": 0.859375, "learning_rate": 0.00019765198530360137, "loss": 2.4627, "step": 418 }, { "epoch": 0.16279352128408733, "grad_norm": 0.84765625, "learning_rate": 0.000197622660611222, "loss": 2.4688, "step": 419 }, { "epoch": 0.1631820499745028, "grad_norm": 0.859375, "learning_rate": 0.00019759315613596492, "loss": 2.4542, "step": 420 }, { "epoch": 0.16357057866491828, "grad_norm": 0.87890625, "learning_rate": 0.0001975634719321658, "loss": 2.4351, "step": 421 }, { "epoch": 0.16395910735533378, "grad_norm": 0.8828125, "learning_rate": 0.00019753360805449145, "loss": 2.4476, "step": 422 }, { "epoch": 0.16434763604574926, "grad_norm": 0.890625, "learning_rate": 0.0001975035645579394, "loss": 2.4848, "step": 423 }, { "epoch": 0.16473616473616473, "grad_norm": 0.94140625, "learning_rate": 0.00019747334149783813, "loss": 2.5401, "step": 424 }, { "epoch": 0.16512469342658023, "grad_norm": 0.8515625, "learning_rate": 0.0001974429389298467, "loss": 2.4539, "step": 425 }, { "epoch": 0.1655132221169957, "grad_norm": 0.8984375, "learning_rate": 0.0001974123569099548, "loss": 2.4487, "step": 426 }, { "epoch": 0.16590175080741118, "grad_norm": 0.83984375, "learning_rate": 0.0001973815954944826, "loss": 2.4444, "step": 427 }, { "epoch": 0.16629027949782665, "grad_norm": 1.0078125, "learning_rate": 0.00019735065474008062, "loss": 2.4784, "step": 428 }, { "epoch": 0.16667880818824216, "grad_norm": 0.91796875, "learning_rate": 0.00019731953470372964, "loss": 2.4598, "step": 429 }, { "epoch": 0.16706733687865763, "grad_norm": 0.97265625, "learning_rate": 0.00019728823544274063, "loss": 2.4388, "step": 430 }, { "epoch": 0.1674558655690731, "grad_norm": 0.95703125, "learning_rate": 0.00019725675701475468, "loss": 2.4653, "step": 431 }, { "epoch": 0.1678443942594886, "grad_norm": 0.875, "learning_rate": 0.00019722509947774276, "loss": 2.4829, "step": 432 }, { "epoch": 0.16823292294990408, "grad_norm": 0.875, "learning_rate": 0.00019719326289000568, "loss": 2.4322, "step": 433 }, { "epoch": 0.16862145164031955, "grad_norm": 1.0546875, "learning_rate": 0.00019716124731017405, "loss": 2.4855, "step": 434 }, { "epoch": 0.16900998033073505, "grad_norm": 0.8828125, "learning_rate": 0.00019712905279720806, "loss": 2.4812, "step": 435 }, { "epoch": 0.16939850902115053, "grad_norm": 0.859375, "learning_rate": 0.0001970966794103975, "loss": 2.4207, "step": 436 }, { "epoch": 0.169787037711566, "grad_norm": 0.90234375, "learning_rate": 0.00019706412720936145, "loss": 2.3899, "step": 437 }, { "epoch": 0.1701755664019815, "grad_norm": 0.890625, "learning_rate": 0.00019703139625404844, "loss": 2.4568, "step": 438 }, { "epoch": 0.17056409509239698, "grad_norm": 0.90234375, "learning_rate": 0.00019699848660473612, "loss": 2.495, "step": 439 }, { "epoch": 0.17095262378281245, "grad_norm": 0.85546875, "learning_rate": 0.0001969653983220312, "loss": 2.5276, "step": 440 }, { "epoch": 0.17134115247322795, "grad_norm": 0.87109375, "learning_rate": 0.0001969321314668694, "loss": 2.4646, "step": 441 }, { "epoch": 0.17172968116364343, "grad_norm": 0.8984375, "learning_rate": 0.0001968986861005153, "loss": 2.4508, "step": 442 }, { "epoch": 0.1721182098540589, "grad_norm": 0.87890625, "learning_rate": 0.00019686506228456225, "loss": 2.5221, "step": 443 }, { "epoch": 0.1725067385444744, "grad_norm": 0.859375, "learning_rate": 0.0001968312600809322, "loss": 2.4256, "step": 444 }, { "epoch": 0.17289526723488988, "grad_norm": 0.828125, "learning_rate": 0.0001967972795518756, "loss": 2.4289, "step": 445 }, { "epoch": 0.17328379592530535, "grad_norm": 0.91796875, "learning_rate": 0.00019676312075997139, "loss": 2.374, "step": 446 }, { "epoch": 0.17367232461572085, "grad_norm": 0.8515625, "learning_rate": 0.00019672878376812667, "loss": 2.4398, "step": 447 }, { "epoch": 0.17406085330613633, "grad_norm": 0.8828125, "learning_rate": 0.00019669426863957686, "loss": 2.3595, "step": 448 }, { "epoch": 0.1744493819965518, "grad_norm": 0.8515625, "learning_rate": 0.00019665957543788532, "loss": 2.4631, "step": 449 }, { "epoch": 0.1748379106869673, "grad_norm": 0.83984375, "learning_rate": 0.00019662470422694342, "loss": 2.4148, "step": 450 }, { "epoch": 0.17522643937738278, "grad_norm": 0.97265625, "learning_rate": 0.0001965896550709704, "loss": 2.4994, "step": 451 }, { "epoch": 0.17561496806779825, "grad_norm": 0.9375, "learning_rate": 0.00019655442803451301, "loss": 2.4471, "step": 452 }, { "epoch": 0.17600349675821375, "grad_norm": 0.95703125, "learning_rate": 0.00019651902318244582, "loss": 2.5041, "step": 453 }, { "epoch": 0.17639202544862922, "grad_norm": 0.8359375, "learning_rate": 0.0001964834405799707, "loss": 2.4156, "step": 454 }, { "epoch": 0.1767805541390447, "grad_norm": 0.84375, "learning_rate": 0.00019644768029261698, "loss": 2.5044, "step": 455 }, { "epoch": 0.1771690828294602, "grad_norm": 0.890625, "learning_rate": 0.00019641174238624115, "loss": 2.482, "step": 456 }, { "epoch": 0.17755761151987567, "grad_norm": 0.8515625, "learning_rate": 0.00019637562692702677, "loss": 2.4772, "step": 457 }, { "epoch": 0.17794614021029115, "grad_norm": 0.96484375, "learning_rate": 0.00019633933398148452, "loss": 2.3809, "step": 458 }, { "epoch": 0.17833466890070665, "grad_norm": 0.80078125, "learning_rate": 0.0001963028636164518, "loss": 2.4452, "step": 459 }, { "epoch": 0.17872319759112212, "grad_norm": 1.015625, "learning_rate": 0.0001962662158990928, "loss": 2.4653, "step": 460 }, { "epoch": 0.1791117262815376, "grad_norm": 0.921875, "learning_rate": 0.00019622939089689837, "loss": 2.4565, "step": 461 }, { "epoch": 0.17950025497195307, "grad_norm": 0.890625, "learning_rate": 0.00019619238867768577, "loss": 2.4595, "step": 462 }, { "epoch": 0.17988878366236857, "grad_norm": 0.9375, "learning_rate": 0.00019615520930959867, "loss": 2.4754, "step": 463 }, { "epoch": 0.18027731235278405, "grad_norm": 0.8359375, "learning_rate": 0.00019611785286110695, "loss": 2.4152, "step": 464 }, { "epoch": 0.18066584104319952, "grad_norm": 0.90234375, "learning_rate": 0.00019608031940100665, "loss": 2.4357, "step": 465 }, { "epoch": 0.18105436973361502, "grad_norm": 0.94921875, "learning_rate": 0.00019604260899841979, "loss": 2.5019, "step": 466 }, { "epoch": 0.1814428984240305, "grad_norm": 0.8984375, "learning_rate": 0.0001960047217227942, "loss": 2.5252, "step": 467 }, { "epoch": 0.18183142711444597, "grad_norm": 0.83203125, "learning_rate": 0.00019596665764390342, "loss": 2.3954, "step": 468 }, { "epoch": 0.18221995580486147, "grad_norm": 1.03125, "learning_rate": 0.0001959284168318467, "loss": 2.4657, "step": 469 }, { "epoch": 0.18260848449527695, "grad_norm": 0.9140625, "learning_rate": 0.00019588999935704876, "loss": 2.5376, "step": 470 }, { "epoch": 0.18299701318569242, "grad_norm": 0.88671875, "learning_rate": 0.0001958514052902595, "loss": 2.3757, "step": 471 }, { "epoch": 0.18338554187610792, "grad_norm": 0.9921875, "learning_rate": 0.00019581263470255417, "loss": 2.4966, "step": 472 }, { "epoch": 0.1837740705665234, "grad_norm": 0.8515625, "learning_rate": 0.0001957736876653331, "loss": 2.4341, "step": 473 }, { "epoch": 0.18416259925693887, "grad_norm": 0.84375, "learning_rate": 0.00019573456425032158, "loss": 2.3674, "step": 474 }, { "epoch": 0.18455112794735437, "grad_norm": 1.0234375, "learning_rate": 0.00019569526452956962, "loss": 2.4213, "step": 475 }, { "epoch": 0.18493965663776984, "grad_norm": 0.90625, "learning_rate": 0.00019565578857545202, "loss": 2.4737, "step": 476 }, { "epoch": 0.18532818532818532, "grad_norm": 0.8515625, "learning_rate": 0.0001956161364606681, "loss": 2.4017, "step": 477 }, { "epoch": 0.18571671401860082, "grad_norm": 0.90234375, "learning_rate": 0.00019557630825824156, "loss": 2.515, "step": 478 }, { "epoch": 0.1861052427090163, "grad_norm": 1.0703125, "learning_rate": 0.00019553630404152043, "loss": 2.4351, "step": 479 }, { "epoch": 0.18649377139943177, "grad_norm": 0.87890625, "learning_rate": 0.00019549612388417692, "loss": 2.3931, "step": 480 }, { "epoch": 0.18688230008984727, "grad_norm": 0.94921875, "learning_rate": 0.00019545576786020717, "loss": 2.4537, "step": 481 }, { "epoch": 0.18727082878026274, "grad_norm": 0.87890625, "learning_rate": 0.00019541523604393128, "loss": 2.514, "step": 482 }, { "epoch": 0.18765935747067822, "grad_norm": 0.91796875, "learning_rate": 0.00019537452850999302, "loss": 2.4735, "step": 483 }, { "epoch": 0.18804788616109372, "grad_norm": 0.875, "learning_rate": 0.0001953336453333598, "loss": 2.4783, "step": 484 }, { "epoch": 0.1884364148515092, "grad_norm": 1.03125, "learning_rate": 0.00019529258658932248, "loss": 2.4402, "step": 485 }, { "epoch": 0.18882494354192467, "grad_norm": 0.88671875, "learning_rate": 0.0001952513523534953, "loss": 2.446, "step": 486 }, { "epoch": 0.18921347223234017, "grad_norm": 0.87890625, "learning_rate": 0.0001952099427018156, "loss": 2.4653, "step": 487 }, { "epoch": 0.18960200092275564, "grad_norm": 0.9296875, "learning_rate": 0.00019516835771054382, "loss": 2.439, "step": 488 }, { "epoch": 0.18999052961317112, "grad_norm": 0.86328125, "learning_rate": 0.00019512659745626334, "loss": 2.5075, "step": 489 }, { "epoch": 0.19037905830358662, "grad_norm": 0.83203125, "learning_rate": 0.0001950846620158802, "loss": 2.4529, "step": 490 }, { "epoch": 0.1907675869940021, "grad_norm": 0.83203125, "learning_rate": 0.00019504255146662321, "loss": 2.3809, "step": 491 }, { "epoch": 0.19115611568441757, "grad_norm": 0.859375, "learning_rate": 0.0001950002658860435, "loss": 2.4723, "step": 492 }, { "epoch": 0.19154464437483307, "grad_norm": 0.890625, "learning_rate": 0.0001949578053520147, "loss": 2.4074, "step": 493 }, { "epoch": 0.19193317306524854, "grad_norm": 0.88671875, "learning_rate": 0.0001949151699427325, "loss": 2.4241, "step": 494 }, { "epoch": 0.19232170175566402, "grad_norm": 0.859375, "learning_rate": 0.00019487235973671471, "loss": 2.4672, "step": 495 }, { "epoch": 0.1927102304460795, "grad_norm": 0.87109375, "learning_rate": 0.0001948293748128011, "loss": 2.4237, "step": 496 }, { "epoch": 0.193098759136495, "grad_norm": 0.87890625, "learning_rate": 0.00019478621525015303, "loss": 2.442, "step": 497 }, { "epoch": 0.19348728782691046, "grad_norm": 0.94921875, "learning_rate": 0.00019474288112825367, "loss": 2.4969, "step": 498 }, { "epoch": 0.19387581651732594, "grad_norm": 0.86328125, "learning_rate": 0.00019469937252690754, "loss": 2.4462, "step": 499 }, { "epoch": 0.19426434520774144, "grad_norm": 0.94140625, "learning_rate": 0.00019465568952624053, "loss": 2.3824, "step": 500 }, { "epoch": 0.19465287389815691, "grad_norm": 0.984375, "learning_rate": 0.00019461183220669974, "loss": 2.5187, "step": 501 }, { "epoch": 0.1950414025885724, "grad_norm": 0.921875, "learning_rate": 0.00019456780064905317, "loss": 2.4442, "step": 502 }, { "epoch": 0.1954299312789879, "grad_norm": 1.0234375, "learning_rate": 0.00019452359493438983, "loss": 2.4223, "step": 503 }, { "epoch": 0.19581845996940336, "grad_norm": 0.83203125, "learning_rate": 0.0001944792151441194, "loss": 2.544, "step": 504 }, { "epoch": 0.19620698865981884, "grad_norm": 0.98046875, "learning_rate": 0.00019443466135997216, "loss": 2.4107, "step": 505 }, { "epoch": 0.19659551735023434, "grad_norm": 0.9921875, "learning_rate": 0.0001943899336639988, "loss": 2.4845, "step": 506 }, { "epoch": 0.1969840460406498, "grad_norm": 0.80078125, "learning_rate": 0.00019434503213857027, "loss": 2.4167, "step": 507 }, { "epoch": 0.1973725747310653, "grad_norm": 0.98828125, "learning_rate": 0.0001942999568663777, "loss": 2.4629, "step": 508 }, { "epoch": 0.1977611034214808, "grad_norm": 0.94140625, "learning_rate": 0.0001942547079304322, "loss": 2.4738, "step": 509 }, { "epoch": 0.19814963211189626, "grad_norm": 0.94921875, "learning_rate": 0.00019420928541406463, "loss": 2.4505, "step": 510 }, { "epoch": 0.19853816080231174, "grad_norm": 1.5390625, "learning_rate": 0.00019416368940092555, "loss": 2.4139, "step": 511 }, { "epoch": 0.19892668949272724, "grad_norm": 0.97265625, "learning_rate": 0.00019411791997498502, "loss": 2.4519, "step": 512 }, { "epoch": 0.1993152181831427, "grad_norm": 1.0546875, "learning_rate": 0.00019407197722053255, "loss": 2.4682, "step": 513 }, { "epoch": 0.19970374687355819, "grad_norm": 1.0546875, "learning_rate": 0.0001940258612221767, "loss": 2.4073, "step": 514 }, { "epoch": 0.2000922755639737, "grad_norm": 0.9609375, "learning_rate": 0.0001939795720648452, "loss": 2.4505, "step": 515 }, { "epoch": 0.20048080425438916, "grad_norm": 0.8984375, "learning_rate": 0.00019393310983378462, "loss": 2.4493, "step": 516 }, { "epoch": 0.20086933294480463, "grad_norm": 0.9921875, "learning_rate": 0.0001938864746145603, "loss": 2.4376, "step": 517 }, { "epoch": 0.20125786163522014, "grad_norm": 1.046875, "learning_rate": 0.00019383966649305607, "loss": 2.3207, "step": 518 }, { "epoch": 0.2016463903256356, "grad_norm": 0.84765625, "learning_rate": 0.00019379268555547432, "loss": 2.522, "step": 519 }, { "epoch": 0.20203491901605108, "grad_norm": 0.921875, "learning_rate": 0.00019374553188833554, "loss": 2.491, "step": 520 }, { "epoch": 0.20242344770646659, "grad_norm": 0.921875, "learning_rate": 0.00019369820557847844, "loss": 2.4827, "step": 521 }, { "epoch": 0.20281197639688206, "grad_norm": 0.796875, "learning_rate": 0.00019365070671305962, "loss": 2.4586, "step": 522 }, { "epoch": 0.20320050508729753, "grad_norm": 0.8515625, "learning_rate": 0.0001936030353795535, "loss": 2.4202, "step": 523 }, { "epoch": 0.20358903377771304, "grad_norm": 0.90625, "learning_rate": 0.00019355519166575205, "loss": 2.4167, "step": 524 }, { "epoch": 0.2039775624681285, "grad_norm": 0.875, "learning_rate": 0.00019350717565976474, "loss": 2.4965, "step": 525 }, { "epoch": 0.20436609115854398, "grad_norm": 0.81640625, "learning_rate": 0.00019345898745001838, "loss": 2.4731, "step": 526 }, { "epoch": 0.20475461984895948, "grad_norm": 0.87890625, "learning_rate": 0.0001934106271252568, "loss": 2.4869, "step": 527 }, { "epoch": 0.20514314853937496, "grad_norm": 0.83984375, "learning_rate": 0.00019336209477454087, "loss": 2.3862, "step": 528 }, { "epoch": 0.20553167722979043, "grad_norm": 0.8515625, "learning_rate": 0.0001933133904872483, "loss": 2.4865, "step": 529 }, { "epoch": 0.2059202059202059, "grad_norm": 3.5, "learning_rate": 0.0001932645143530734, "loss": 2.4302, "step": 530 }, { "epoch": 0.2063087346106214, "grad_norm": 0.98828125, "learning_rate": 0.00019321546646202688, "loss": 2.4552, "step": 531 }, { "epoch": 0.20669726330103688, "grad_norm": 0.83984375, "learning_rate": 0.0001931662469044359, "loss": 2.4428, "step": 532 }, { "epoch": 0.20708579199145236, "grad_norm": 0.80078125, "learning_rate": 0.00019311685577094368, "loss": 2.5089, "step": 533 }, { "epoch": 0.20747432068186786, "grad_norm": 0.80859375, "learning_rate": 0.0001930672931525094, "loss": 2.4275, "step": 534 }, { "epoch": 0.20786284937228333, "grad_norm": 0.7734375, "learning_rate": 0.0001930175591404081, "loss": 2.3744, "step": 535 }, { "epoch": 0.2082513780626988, "grad_norm": 0.875, "learning_rate": 0.00019296765382623049, "loss": 2.4922, "step": 536 }, { "epoch": 0.2086399067531143, "grad_norm": 0.8359375, "learning_rate": 0.0001929175773018826, "loss": 2.5342, "step": 537 }, { "epoch": 0.20902843544352978, "grad_norm": 1.0234375, "learning_rate": 0.00019286732965958592, "loss": 2.4245, "step": 538 }, { "epoch": 0.20941696413394525, "grad_norm": 0.828125, "learning_rate": 0.00019281691099187702, "loss": 2.4321, "step": 539 }, { "epoch": 0.20980549282436076, "grad_norm": 1.09375, "learning_rate": 0.0001927663213916074, "loss": 2.4336, "step": 540 }, { "epoch": 0.21019402151477623, "grad_norm": 1.0546875, "learning_rate": 0.0001927155609519434, "loss": 2.4085, "step": 541 }, { "epoch": 0.2105825502051917, "grad_norm": 1.109375, "learning_rate": 0.00019266462976636598, "loss": 2.4764, "step": 542 }, { "epoch": 0.2109710788956072, "grad_norm": 0.9296875, "learning_rate": 0.00019261352792867046, "loss": 2.4533, "step": 543 }, { "epoch": 0.21135960758602268, "grad_norm": 0.82421875, "learning_rate": 0.00019256225553296655, "loss": 2.4508, "step": 544 }, { "epoch": 0.21174813627643815, "grad_norm": 0.8671875, "learning_rate": 0.00019251081267367807, "loss": 2.3913, "step": 545 }, { "epoch": 0.21213666496685366, "grad_norm": 0.87890625, "learning_rate": 0.00019245919944554257, "loss": 2.3765, "step": 546 }, { "epoch": 0.21252519365726913, "grad_norm": 0.953125, "learning_rate": 0.00019240741594361155, "loss": 2.3952, "step": 547 }, { "epoch": 0.2129137223476846, "grad_norm": 0.8984375, "learning_rate": 0.0001923554622632501, "loss": 2.4084, "step": 548 }, { "epoch": 0.2133022510381001, "grad_norm": 0.953125, "learning_rate": 0.00019230333850013654, "loss": 2.3587, "step": 549 }, { "epoch": 0.21369077972851558, "grad_norm": 1.0625, "learning_rate": 0.00019225104475026258, "loss": 2.3831, "step": 550 }, { "epoch": 0.21407930841893105, "grad_norm": 0.85546875, "learning_rate": 0.00019219858110993288, "loss": 2.3703, "step": 551 }, { "epoch": 0.21446783710934655, "grad_norm": 1.0078125, "learning_rate": 0.00019214594767576508, "loss": 2.4301, "step": 552 }, { "epoch": 0.21485636579976203, "grad_norm": 1.15625, "learning_rate": 0.00019209314454468937, "loss": 2.4783, "step": 553 }, { "epoch": 0.2152448944901775, "grad_norm": 0.8515625, "learning_rate": 0.00019204017181394855, "loss": 2.4635, "step": 554 }, { "epoch": 0.215633423180593, "grad_norm": 1.0625, "learning_rate": 0.00019198702958109776, "loss": 2.3172, "step": 555 }, { "epoch": 0.21602195187100848, "grad_norm": 0.9609375, "learning_rate": 0.00019193371794400425, "loss": 2.4102, "step": 556 }, { "epoch": 0.21641048056142395, "grad_norm": 0.8828125, "learning_rate": 0.00019188023700084726, "loss": 2.4023, "step": 557 }, { "epoch": 0.21679900925183945, "grad_norm": 0.890625, "learning_rate": 0.00019182658685011785, "loss": 2.5074, "step": 558 }, { "epoch": 0.21718753794225493, "grad_norm": 0.8671875, "learning_rate": 0.0001917727675906187, "loss": 2.4028, "step": 559 }, { "epoch": 0.2175760666326704, "grad_norm": 0.82421875, "learning_rate": 0.00019171877932146387, "loss": 2.4564, "step": 560 }, { "epoch": 0.2179645953230859, "grad_norm": 0.81640625, "learning_rate": 0.00019166462214207869, "loss": 2.4189, "step": 561 }, { "epoch": 0.21835312401350138, "grad_norm": 0.875, "learning_rate": 0.00019161029615219962, "loss": 2.4836, "step": 562 }, { "epoch": 0.21874165270391685, "grad_norm": 0.84765625, "learning_rate": 0.0001915558014518739, "loss": 2.4663, "step": 563 }, { "epoch": 0.21913018139433232, "grad_norm": 0.8515625, "learning_rate": 0.00019150113814145957, "loss": 2.3867, "step": 564 }, { "epoch": 0.21951871008474783, "grad_norm": 0.8671875, "learning_rate": 0.00019144630632162508, "loss": 2.4438, "step": 565 }, { "epoch": 0.2199072387751633, "grad_norm": 0.875, "learning_rate": 0.0001913913060933493, "loss": 2.4158, "step": 566 }, { "epoch": 0.22029576746557877, "grad_norm": 0.90625, "learning_rate": 0.0001913361375579212, "loss": 2.4088, "step": 567 }, { "epoch": 0.22068429615599428, "grad_norm": 0.97265625, "learning_rate": 0.0001912808008169397, "loss": 2.4266, "step": 568 }, { "epoch": 0.22107282484640975, "grad_norm": 0.82421875, "learning_rate": 0.00019122529597231352, "loss": 2.4021, "step": 569 }, { "epoch": 0.22146135353682522, "grad_norm": 1.015625, "learning_rate": 0.00019116962312626092, "loss": 2.4939, "step": 570 }, { "epoch": 0.22184988222724072, "grad_norm": 1.0234375, "learning_rate": 0.00019111378238130956, "loss": 2.4856, "step": 571 }, { "epoch": 0.2222384109176562, "grad_norm": 5.90625, "learning_rate": 0.0001910577738402964, "loss": 2.5271, "step": 572 }, { "epoch": 0.22262693960807167, "grad_norm": 1.0, "learning_rate": 0.00019100159760636727, "loss": 2.4331, "step": 573 }, { "epoch": 0.22301546829848717, "grad_norm": 0.953125, "learning_rate": 0.00019094525378297687, "loss": 2.4777, "step": 574 }, { "epoch": 0.22340399698890265, "grad_norm": 0.828125, "learning_rate": 0.00019088874247388864, "loss": 2.5293, "step": 575 }, { "epoch": 0.22379252567931812, "grad_norm": 0.8515625, "learning_rate": 0.0001908320637831743, "loss": 2.4537, "step": 576 }, { "epoch": 0.22418105436973362, "grad_norm": 1.0390625, "learning_rate": 0.00019077521781521388, "loss": 2.4579, "step": 577 }, { "epoch": 0.2245695830601491, "grad_norm": 0.9140625, "learning_rate": 0.00019071820467469558, "loss": 2.4129, "step": 578 }, { "epoch": 0.22495811175056457, "grad_norm": 0.9140625, "learning_rate": 0.0001906610244666153, "loss": 2.3975, "step": 579 }, { "epoch": 0.22534664044098007, "grad_norm": 0.8671875, "learning_rate": 0.00019060367729627673, "loss": 2.4488, "step": 580 }, { "epoch": 0.22573516913139555, "grad_norm": 0.89453125, "learning_rate": 0.000190546163269291, "loss": 2.5263, "step": 581 }, { "epoch": 0.22612369782181102, "grad_norm": 0.8359375, "learning_rate": 0.00019048848249157647, "loss": 2.4725, "step": 582 }, { "epoch": 0.22651222651222652, "grad_norm": 0.8359375, "learning_rate": 0.0001904306350693587, "loss": 2.4523, "step": 583 }, { "epoch": 0.226900755202642, "grad_norm": 0.79296875, "learning_rate": 0.00019037262110917008, "loss": 2.5423, "step": 584 }, { "epoch": 0.22728928389305747, "grad_norm": 0.8203125, "learning_rate": 0.00019031444071784966, "loss": 2.4369, "step": 585 }, { "epoch": 0.22767781258347297, "grad_norm": 0.875, "learning_rate": 0.00019025609400254308, "loss": 2.4778, "step": 586 }, { "epoch": 0.22806634127388845, "grad_norm": 0.8203125, "learning_rate": 0.00019019758107070224, "loss": 2.3663, "step": 587 }, { "epoch": 0.22845486996430392, "grad_norm": 0.83203125, "learning_rate": 0.00019013890203008514, "loss": 2.3942, "step": 588 }, { "epoch": 0.22884339865471942, "grad_norm": 1.0078125, "learning_rate": 0.00019008005698875567, "loss": 2.4601, "step": 589 }, { "epoch": 0.2292319273451349, "grad_norm": 0.85546875, "learning_rate": 0.00019002104605508343, "loss": 2.4136, "step": 590 }, { "epoch": 0.22962045603555037, "grad_norm": 0.8671875, "learning_rate": 0.00018996186933774366, "loss": 2.4091, "step": 591 }, { "epoch": 0.23000898472596587, "grad_norm": 0.8671875, "learning_rate": 0.00018990252694571667, "loss": 2.3836, "step": 592 }, { "epoch": 0.23039751341638134, "grad_norm": 0.828125, "learning_rate": 0.00018984301898828813, "loss": 2.4327, "step": 593 }, { "epoch": 0.23078604210679682, "grad_norm": 0.87109375, "learning_rate": 0.00018978334557504843, "loss": 2.4264, "step": 594 }, { "epoch": 0.23117457079721232, "grad_norm": 0.8515625, "learning_rate": 0.0001897235068158927, "loss": 2.4683, "step": 595 }, { "epoch": 0.2315630994876278, "grad_norm": 0.8359375, "learning_rate": 0.00018966350282102065, "loss": 2.4395, "step": 596 }, { "epoch": 0.23195162817804327, "grad_norm": 0.91796875, "learning_rate": 0.00018960333370093626, "loss": 2.4799, "step": 597 }, { "epoch": 0.23234015686845874, "grad_norm": 1.125, "learning_rate": 0.00018954299956644755, "loss": 2.4338, "step": 598 }, { "epoch": 0.23272868555887424, "grad_norm": 0.90234375, "learning_rate": 0.00018948250052866646, "loss": 2.4708, "step": 599 }, { "epoch": 0.23311721424928972, "grad_norm": 0.80078125, "learning_rate": 0.0001894218366990087, "loss": 2.4036, "step": 600 }, { "epoch": 0.2335057429397052, "grad_norm": 0.94140625, "learning_rate": 0.00018936100818919328, "loss": 2.4747, "step": 601 }, { "epoch": 0.2338942716301207, "grad_norm": 1.078125, "learning_rate": 0.0001893000151112427, "loss": 2.5058, "step": 602 }, { "epoch": 0.23428280032053617, "grad_norm": 1.2265625, "learning_rate": 0.00018923885757748238, "loss": 2.4404, "step": 603 }, { "epoch": 0.23467132901095164, "grad_norm": 0.94140625, "learning_rate": 0.00018917753570054066, "loss": 2.4196, "step": 604 }, { "epoch": 0.23505985770136714, "grad_norm": 0.8359375, "learning_rate": 0.00018911604959334858, "loss": 2.4353, "step": 605 }, { "epoch": 0.23544838639178262, "grad_norm": 0.8203125, "learning_rate": 0.00018905439936913948, "loss": 2.476, "step": 606 }, { "epoch": 0.2358369150821981, "grad_norm": 1.0078125, "learning_rate": 0.00018899258514144907, "loss": 2.4251, "step": 607 }, { "epoch": 0.2362254437726136, "grad_norm": 0.8359375, "learning_rate": 0.00018893060702411508, "loss": 2.4725, "step": 608 }, { "epoch": 0.23661397246302907, "grad_norm": 0.86328125, "learning_rate": 0.000188868465131277, "loss": 2.4095, "step": 609 }, { "epoch": 0.23700250115344454, "grad_norm": 0.80859375, "learning_rate": 0.00018880615957737602, "loss": 2.4845, "step": 610 }, { "epoch": 0.23739102984386004, "grad_norm": 0.85546875, "learning_rate": 0.00018874369047715465, "loss": 2.3689, "step": 611 }, { "epoch": 0.23777955853427551, "grad_norm": 0.77734375, "learning_rate": 0.0001886810579456566, "loss": 2.4247, "step": 612 }, { "epoch": 0.238168087224691, "grad_norm": 0.79296875, "learning_rate": 0.00018861826209822663, "loss": 2.3775, "step": 613 }, { "epoch": 0.2385566159151065, "grad_norm": 0.90234375, "learning_rate": 0.00018855530305051011, "loss": 2.4572, "step": 614 }, { "epoch": 0.23894514460552196, "grad_norm": 0.85546875, "learning_rate": 0.00018849218091845314, "loss": 2.4004, "step": 615 }, { "epoch": 0.23933367329593744, "grad_norm": 0.8125, "learning_rate": 0.00018842889581830206, "loss": 2.4783, "step": 616 }, { "epoch": 0.23972220198635294, "grad_norm": 0.96875, "learning_rate": 0.0001883654478666033, "loss": 2.3635, "step": 617 }, { "epoch": 0.2401107306767684, "grad_norm": 0.91796875, "learning_rate": 0.00018830183718020332, "loss": 2.3983, "step": 618 }, { "epoch": 0.2404992593671839, "grad_norm": 0.8671875, "learning_rate": 0.00018823806387624817, "loss": 2.4074, "step": 619 }, { "epoch": 0.2408877880575994, "grad_norm": 1.1640625, "learning_rate": 0.00018817412807218338, "loss": 2.5352, "step": 620 }, { "epoch": 0.24127631674801486, "grad_norm": 0.875, "learning_rate": 0.00018811002988575382, "loss": 2.4445, "step": 621 }, { "epoch": 0.24166484543843034, "grad_norm": 0.87109375, "learning_rate": 0.00018804576943500333, "loss": 2.4991, "step": 622 }, { "epoch": 0.24205337412884584, "grad_norm": 0.984375, "learning_rate": 0.00018798134683827464, "loss": 2.4199, "step": 623 }, { "epoch": 0.2424419028192613, "grad_norm": 0.76953125, "learning_rate": 0.000187916762214209, "loss": 2.3854, "step": 624 }, { "epoch": 0.2428304315096768, "grad_norm": 1.0, "learning_rate": 0.00018785201568174617, "loss": 2.4612, "step": 625 }, { "epoch": 0.2432189602000923, "grad_norm": 0.9296875, "learning_rate": 0.00018778710736012396, "loss": 2.4219, "step": 626 }, { "epoch": 0.24360748889050776, "grad_norm": 0.828125, "learning_rate": 0.0001877220373688782, "loss": 2.4123, "step": 627 }, { "epoch": 0.24399601758092324, "grad_norm": 0.83984375, "learning_rate": 0.0001876568058278425, "loss": 2.4361, "step": 628 }, { "epoch": 0.24438454627133874, "grad_norm": 0.828125, "learning_rate": 0.00018759141285714783, "loss": 2.4355, "step": 629 }, { "epoch": 0.2447730749617542, "grad_norm": 0.8515625, "learning_rate": 0.00018752585857722266, "loss": 2.4675, "step": 630 }, { "epoch": 0.24516160365216969, "grad_norm": 0.85546875, "learning_rate": 0.00018746014310879228, "loss": 2.4521, "step": 631 }, { "epoch": 0.24555013234258516, "grad_norm": 0.82421875, "learning_rate": 0.00018739426657287907, "loss": 2.435, "step": 632 }, { "epoch": 0.24593866103300066, "grad_norm": 0.8359375, "learning_rate": 0.0001873282290908019, "loss": 2.4109, "step": 633 }, { "epoch": 0.24632718972341613, "grad_norm": 0.79296875, "learning_rate": 0.00018726203078417604, "loss": 2.4263, "step": 634 }, { "epoch": 0.2467157184138316, "grad_norm": 0.80078125, "learning_rate": 0.00018719567177491296, "loss": 2.3932, "step": 635 }, { "epoch": 0.2471042471042471, "grad_norm": 0.80078125, "learning_rate": 0.0001871291521852201, "loss": 2.3958, "step": 636 }, { "epoch": 0.24749277579466258, "grad_norm": 0.9765625, "learning_rate": 0.0001870624721376006, "loss": 2.4574, "step": 637 }, { "epoch": 0.24788130448507806, "grad_norm": 0.83984375, "learning_rate": 0.0001869956317548531, "loss": 2.4139, "step": 638 }, { "epoch": 0.24826983317549356, "grad_norm": 0.8984375, "learning_rate": 0.00018692863116007152, "loss": 2.4308, "step": 639 }, { "epoch": 0.24865836186590903, "grad_norm": 0.78125, "learning_rate": 0.0001868614704766449, "loss": 2.3843, "step": 640 }, { "epoch": 0.2490468905563245, "grad_norm": 0.84765625, "learning_rate": 0.00018679414982825693, "loss": 2.4272, "step": 641 }, { "epoch": 0.24943541924674, "grad_norm": 0.86328125, "learning_rate": 0.00018672666933888603, "loss": 2.4451, "step": 642 }, { "epoch": 0.24982394793715548, "grad_norm": 0.92578125, "learning_rate": 0.00018665902913280496, "loss": 2.4515, "step": 643 }, { "epoch": 0.25021247662757096, "grad_norm": 0.85546875, "learning_rate": 0.00018659122933458065, "loss": 2.399, "step": 644 }, { "epoch": 0.25060100531798646, "grad_norm": 0.796875, "learning_rate": 0.00018652327006907386, "loss": 2.5024, "step": 645 }, { "epoch": 0.25098953400840196, "grad_norm": 0.83984375, "learning_rate": 0.00018645515146143904, "loss": 2.3793, "step": 646 }, { "epoch": 0.2513780626988174, "grad_norm": 0.80859375, "learning_rate": 0.0001863868736371241, "loss": 2.4433, "step": 647 }, { "epoch": 0.2517665913892329, "grad_norm": 0.88671875, "learning_rate": 0.00018631843672187022, "loss": 2.4498, "step": 648 }, { "epoch": 0.25215512007964835, "grad_norm": 0.859375, "learning_rate": 0.00018624984084171148, "loss": 2.4227, "step": 649 }, { "epoch": 0.25254364877006386, "grad_norm": 0.83203125, "learning_rate": 0.00018618108612297474, "loss": 2.5026, "step": 650 }, { "epoch": 0.25293217746047936, "grad_norm": 0.86328125, "learning_rate": 0.0001861121726922794, "loss": 2.4045, "step": 651 }, { "epoch": 0.2533207061508948, "grad_norm": 0.8671875, "learning_rate": 0.00018604310067653712, "loss": 2.445, "step": 652 }, { "epoch": 0.2537092348413103, "grad_norm": 0.8203125, "learning_rate": 0.0001859738702029516, "loss": 2.3895, "step": 653 }, { "epoch": 0.2540977635317258, "grad_norm": 0.88671875, "learning_rate": 0.00018590448139901847, "loss": 2.4448, "step": 654 }, { "epoch": 0.25448629222214125, "grad_norm": 0.94140625, "learning_rate": 0.00018583493439252476, "loss": 2.4411, "step": 655 }, { "epoch": 0.25487482091255675, "grad_norm": 0.83984375, "learning_rate": 0.00018576522931154895, "loss": 2.4646, "step": 656 }, { "epoch": 0.25526334960297226, "grad_norm": 0.8359375, "learning_rate": 0.00018569536628446065, "loss": 2.4226, "step": 657 }, { "epoch": 0.2556518782933877, "grad_norm": 0.875, "learning_rate": 0.0001856253454399203, "loss": 2.4317, "step": 658 }, { "epoch": 0.2560404069838032, "grad_norm": 0.93359375, "learning_rate": 0.000185555166906879, "loss": 2.4337, "step": 659 }, { "epoch": 0.2564289356742187, "grad_norm": 0.83203125, "learning_rate": 0.00018548483081457827, "loss": 2.4088, "step": 660 }, { "epoch": 0.25681746436463415, "grad_norm": 0.890625, "learning_rate": 0.00018541433729254972, "loss": 2.4964, "step": 661 }, { "epoch": 0.25720599305504965, "grad_norm": 0.93359375, "learning_rate": 0.00018534368647061495, "loss": 2.4715, "step": 662 }, { "epoch": 0.25759452174546515, "grad_norm": 0.8671875, "learning_rate": 0.00018527287847888523, "loss": 2.4574, "step": 663 }, { "epoch": 0.2579830504358806, "grad_norm": 0.80078125, "learning_rate": 0.0001852019134477613, "loss": 2.4681, "step": 664 }, { "epoch": 0.2583715791262961, "grad_norm": 0.9609375, "learning_rate": 0.00018513079150793305, "loss": 2.4757, "step": 665 }, { "epoch": 0.2587601078167116, "grad_norm": 0.83984375, "learning_rate": 0.00018505951279037934, "loss": 2.4098, "step": 666 }, { "epoch": 0.25914863650712705, "grad_norm": 0.76171875, "learning_rate": 0.0001849880774263678, "loss": 2.353, "step": 667 }, { "epoch": 0.25953716519754255, "grad_norm": 0.890625, "learning_rate": 0.00018491648554745457, "loss": 2.4913, "step": 668 }, { "epoch": 0.25992569388795805, "grad_norm": 0.796875, "learning_rate": 0.0001848447372854839, "loss": 2.4221, "step": 669 }, { "epoch": 0.2603142225783735, "grad_norm": 0.81640625, "learning_rate": 0.00018477283277258812, "loss": 2.3949, "step": 670 }, { "epoch": 0.260702751268789, "grad_norm": 0.80859375, "learning_rate": 0.00018470077214118732, "loss": 2.4428, "step": 671 }, { "epoch": 0.2610912799592045, "grad_norm": 0.75390625, "learning_rate": 0.0001846285555239891, "loss": 2.4141, "step": 672 }, { "epoch": 0.26147980864961995, "grad_norm": 0.90625, "learning_rate": 0.00018455618305398836, "loss": 2.3777, "step": 673 }, { "epoch": 0.26186833734003545, "grad_norm": 0.84375, "learning_rate": 0.00018448365486446687, "loss": 2.412, "step": 674 }, { "epoch": 0.26225686603045095, "grad_norm": 0.828125, "learning_rate": 0.00018441097108899332, "loss": 2.4766, "step": 675 }, { "epoch": 0.2626453947208664, "grad_norm": 0.8515625, "learning_rate": 0.0001843381318614229, "loss": 2.4765, "step": 676 }, { "epoch": 0.2630339234112819, "grad_norm": 0.9140625, "learning_rate": 0.0001842651373158971, "loss": 2.4458, "step": 677 }, { "epoch": 0.2634224521016974, "grad_norm": 0.83984375, "learning_rate": 0.0001841919875868433, "loss": 2.3816, "step": 678 }, { "epoch": 0.26381098079211285, "grad_norm": 0.8984375, "learning_rate": 0.00018411868280897488, "loss": 2.4251, "step": 679 }, { "epoch": 0.26419950948252835, "grad_norm": 0.83984375, "learning_rate": 0.00018404522311729062, "loss": 2.4098, "step": 680 }, { "epoch": 0.26458803817294385, "grad_norm": 0.828125, "learning_rate": 0.00018397160864707467, "loss": 2.4377, "step": 681 }, { "epoch": 0.2649765668633593, "grad_norm": 0.91796875, "learning_rate": 0.00018389783953389613, "loss": 2.4096, "step": 682 }, { "epoch": 0.2653650955537748, "grad_norm": 0.81640625, "learning_rate": 0.00018382391591360896, "loss": 2.4866, "step": 683 }, { "epoch": 0.2657536242441903, "grad_norm": 0.8203125, "learning_rate": 0.00018374983792235163, "loss": 2.4486, "step": 684 }, { "epoch": 0.26614215293460575, "grad_norm": 0.78515625, "learning_rate": 0.00018367560569654693, "loss": 2.4155, "step": 685 }, { "epoch": 0.26653068162502125, "grad_norm": 0.7734375, "learning_rate": 0.0001836012193729017, "loss": 2.3801, "step": 686 }, { "epoch": 0.26691921031543675, "grad_norm": 0.77734375, "learning_rate": 0.00018352667908840654, "loss": 2.3832, "step": 687 }, { "epoch": 0.2673077390058522, "grad_norm": 0.80859375, "learning_rate": 0.00018345198498033558, "loss": 2.4371, "step": 688 }, { "epoch": 0.2676962676962677, "grad_norm": 0.796875, "learning_rate": 0.00018337713718624623, "loss": 2.5273, "step": 689 }, { "epoch": 0.2680847963866832, "grad_norm": 0.8046875, "learning_rate": 0.00018330213584397896, "loss": 2.4359, "step": 690 }, { "epoch": 0.26847332507709865, "grad_norm": 0.76953125, "learning_rate": 0.000183226981091657, "loss": 2.5058, "step": 691 }, { "epoch": 0.26886185376751415, "grad_norm": 0.80078125, "learning_rate": 0.0001831516730676861, "loss": 2.4414, "step": 692 }, { "epoch": 0.26925038245792965, "grad_norm": 0.84765625, "learning_rate": 0.00018307621191075425, "loss": 2.4316, "step": 693 }, { "epoch": 0.2696389111483451, "grad_norm": 0.8046875, "learning_rate": 0.00018300059775983152, "loss": 2.4051, "step": 694 }, { "epoch": 0.2700274398387606, "grad_norm": 0.828125, "learning_rate": 0.0001829248307541697, "loss": 2.4377, "step": 695 }, { "epoch": 0.2704159685291761, "grad_norm": 0.78125, "learning_rate": 0.000182848911033302, "loss": 2.3461, "step": 696 }, { "epoch": 0.27080449721959154, "grad_norm": 0.88671875, "learning_rate": 0.000182772838737043, "loss": 2.4788, "step": 697 }, { "epoch": 0.27119302591000705, "grad_norm": 0.78515625, "learning_rate": 0.00018269661400548823, "loss": 2.4301, "step": 698 }, { "epoch": 0.27158155460042255, "grad_norm": 0.8984375, "learning_rate": 0.0001826202369790139, "loss": 2.2877, "step": 699 }, { "epoch": 0.271970083290838, "grad_norm": 0.91796875, "learning_rate": 0.00018254370779827668, "loss": 2.4765, "step": 700 }, { "epoch": 0.2723586119812535, "grad_norm": 0.79296875, "learning_rate": 0.00018246702660421356, "loss": 2.371, "step": 701 }, { "epoch": 0.272747140671669, "grad_norm": 0.86328125, "learning_rate": 0.00018239019353804133, "loss": 2.4542, "step": 702 }, { "epoch": 0.27313566936208444, "grad_norm": 1.2265625, "learning_rate": 0.00018231320874125654, "loss": 2.4434, "step": 703 }, { "epoch": 0.27352419805249994, "grad_norm": 1.890625, "learning_rate": 0.00018223607235563524, "loss": 2.4492, "step": 704 }, { "epoch": 0.27391272674291545, "grad_norm": 0.91015625, "learning_rate": 0.0001821587845232325, "loss": 2.4152, "step": 705 }, { "epoch": 0.2743012554333309, "grad_norm": 1.0625, "learning_rate": 0.00018208134538638245, "loss": 2.4712, "step": 706 }, { "epoch": 0.2746897841237464, "grad_norm": 0.8046875, "learning_rate": 0.00018200375508769772, "loss": 2.4033, "step": 707 }, { "epoch": 0.2750783128141619, "grad_norm": 0.84765625, "learning_rate": 0.0001819260137700694, "loss": 2.4302, "step": 708 }, { "epoch": 0.27546684150457734, "grad_norm": 0.93359375, "learning_rate": 0.00018184812157666667, "loss": 2.3352, "step": 709 }, { "epoch": 0.27585537019499284, "grad_norm": 0.8984375, "learning_rate": 0.00018177007865093664, "loss": 2.4609, "step": 710 }, { "epoch": 0.27624389888540835, "grad_norm": 0.765625, "learning_rate": 0.0001816918851366039, "loss": 2.4525, "step": 711 }, { "epoch": 0.2766324275758238, "grad_norm": 0.9296875, "learning_rate": 0.00018161354117767045, "loss": 2.4259, "step": 712 }, { "epoch": 0.2770209562662393, "grad_norm": 0.8515625, "learning_rate": 0.00018153504691841528, "loss": 2.4029, "step": 713 }, { "epoch": 0.2774094849566548, "grad_norm": 0.80078125, "learning_rate": 0.00018145640250339425, "loss": 2.3984, "step": 714 }, { "epoch": 0.27779801364707024, "grad_norm": 0.83984375, "learning_rate": 0.00018137760807743965, "loss": 2.4384, "step": 715 }, { "epoch": 0.27818654233748574, "grad_norm": 0.8671875, "learning_rate": 0.00018129866378566013, "loss": 2.427, "step": 716 }, { "epoch": 0.27857507102790124, "grad_norm": 0.87109375, "learning_rate": 0.00018121956977344033, "loss": 2.4698, "step": 717 }, { "epoch": 0.2789635997183167, "grad_norm": 0.81640625, "learning_rate": 0.00018114032618644053, "loss": 2.3839, "step": 718 }, { "epoch": 0.2793521284087322, "grad_norm": 0.8203125, "learning_rate": 0.0001810609331705965, "loss": 2.4457, "step": 719 }, { "epoch": 0.27974065709914764, "grad_norm": 1.0, "learning_rate": 0.00018098139087211927, "loss": 2.4193, "step": 720 }, { "epoch": 0.28012918578956314, "grad_norm": 1.09375, "learning_rate": 0.00018090169943749476, "loss": 2.4061, "step": 721 }, { "epoch": 0.28051771447997864, "grad_norm": 0.91796875, "learning_rate": 0.00018082185901348346, "loss": 2.3813, "step": 722 }, { "epoch": 0.2809062431703941, "grad_norm": 0.94140625, "learning_rate": 0.00018074186974712032, "loss": 2.4133, "step": 723 }, { "epoch": 0.2812947718608096, "grad_norm": 0.94921875, "learning_rate": 0.0001806617317857144, "loss": 2.4118, "step": 724 }, { "epoch": 0.2816833005512251, "grad_norm": 0.80859375, "learning_rate": 0.00018058144527684854, "loss": 2.4778, "step": 725 }, { "epoch": 0.28207182924164054, "grad_norm": 1.09375, "learning_rate": 0.00018050101036837926, "loss": 2.4505, "step": 726 }, { "epoch": 0.28246035793205604, "grad_norm": 1.078125, "learning_rate": 0.00018042042720843623, "loss": 2.3984, "step": 727 }, { "epoch": 0.28284888662247154, "grad_norm": 0.8828125, "learning_rate": 0.0001803396959454222, "loss": 2.4154, "step": 728 }, { "epoch": 0.283237415312887, "grad_norm": 1.078125, "learning_rate": 0.00018025881672801273, "loss": 2.4028, "step": 729 }, { "epoch": 0.2836259440033025, "grad_norm": 0.9140625, "learning_rate": 0.0001801777897051558, "loss": 2.4469, "step": 730 }, { "epoch": 0.284014472693718, "grad_norm": 0.9609375, "learning_rate": 0.00018009661502607158, "loss": 2.2968, "step": 731 }, { "epoch": 0.28440300138413344, "grad_norm": 1.03125, "learning_rate": 0.0001800152928402522, "loss": 2.4668, "step": 732 }, { "epoch": 0.28479153007454894, "grad_norm": 1.0546875, "learning_rate": 0.00017993382329746133, "loss": 2.4243, "step": 733 }, { "epoch": 0.28518005876496444, "grad_norm": 0.7265625, "learning_rate": 0.0001798522065477342, "loss": 2.3054, "step": 734 }, { "epoch": 0.2855685874553799, "grad_norm": 0.96875, "learning_rate": 0.00017977044274137703, "loss": 2.3318, "step": 735 }, { "epoch": 0.2859571161457954, "grad_norm": 0.96875, "learning_rate": 0.00017968853202896684, "loss": 2.3392, "step": 736 }, { "epoch": 0.2863456448362109, "grad_norm": 0.796875, "learning_rate": 0.00017960647456135125, "loss": 2.3525, "step": 737 }, { "epoch": 0.28673417352662633, "grad_norm": 0.92578125, "learning_rate": 0.0001795242704896481, "loss": 2.4782, "step": 738 }, { "epoch": 0.28712270221704184, "grad_norm": 0.8828125, "learning_rate": 0.00017944191996524525, "loss": 2.4221, "step": 739 }, { "epoch": 0.28751123090745734, "grad_norm": 0.86328125, "learning_rate": 0.00017935942313980022, "loss": 2.4797, "step": 740 }, { "epoch": 0.2878997595978728, "grad_norm": 0.79296875, "learning_rate": 0.00017927678016524005, "loss": 2.4934, "step": 741 }, { "epoch": 0.2882882882882883, "grad_norm": 0.8984375, "learning_rate": 0.00017919399119376087, "loss": 2.4664, "step": 742 }, { "epoch": 0.2886768169787038, "grad_norm": 1.140625, "learning_rate": 0.00017911105637782767, "loss": 2.4354, "step": 743 }, { "epoch": 0.28906534566911923, "grad_norm": 0.8125, "learning_rate": 0.00017902797587017406, "loss": 2.4726, "step": 744 }, { "epoch": 0.28945387435953474, "grad_norm": 1.0, "learning_rate": 0.00017894474982380193, "loss": 2.4309, "step": 745 }, { "epoch": 0.28984240304995024, "grad_norm": 0.95703125, "learning_rate": 0.0001788613783919812, "loss": 2.4065, "step": 746 }, { "epoch": 0.2902309317403657, "grad_norm": 0.74609375, "learning_rate": 0.00017877786172824952, "loss": 2.399, "step": 747 }, { "epoch": 0.2906194604307812, "grad_norm": 0.82421875, "learning_rate": 0.0001786941999864121, "loss": 2.4409, "step": 748 }, { "epoch": 0.2910079891211967, "grad_norm": 0.87890625, "learning_rate": 0.0001786103933205412, "loss": 2.3703, "step": 749 }, { "epoch": 0.29139651781161213, "grad_norm": 0.97265625, "learning_rate": 0.000178526441884976, "loss": 2.389, "step": 750 }, { "epoch": 0.29178504650202763, "grad_norm": 0.8984375, "learning_rate": 0.00017844234583432232, "loss": 2.4237, "step": 751 }, { "epoch": 0.29217357519244314, "grad_norm": 0.83984375, "learning_rate": 0.00017835810532345231, "loss": 2.4474, "step": 752 }, { "epoch": 0.2925621038828586, "grad_norm": 0.8671875, "learning_rate": 0.00017827372050750414, "loss": 2.3757, "step": 753 }, { "epoch": 0.2929506325732741, "grad_norm": 0.83203125, "learning_rate": 0.00017818919154188179, "loss": 2.4017, "step": 754 }, { "epoch": 0.2933391612636896, "grad_norm": 0.87109375, "learning_rate": 0.00017810451858225459, "loss": 2.4833, "step": 755 }, { "epoch": 0.29372768995410503, "grad_norm": 0.83203125, "learning_rate": 0.0001780197017845571, "loss": 2.4328, "step": 756 }, { "epoch": 0.29411621864452053, "grad_norm": 0.78515625, "learning_rate": 0.0001779347413049889, "loss": 2.3754, "step": 757 }, { "epoch": 0.29450474733493603, "grad_norm": 0.77734375, "learning_rate": 0.00017784963730001395, "loss": 2.4475, "step": 758 }, { "epoch": 0.2948932760253515, "grad_norm": 0.7890625, "learning_rate": 0.00017776438992636072, "loss": 2.4114, "step": 759 }, { "epoch": 0.295281804715767, "grad_norm": 0.78515625, "learning_rate": 0.00017767899934102154, "loss": 2.4167, "step": 760 }, { "epoch": 0.2956703334061825, "grad_norm": 0.8046875, "learning_rate": 0.0001775934657012527, "loss": 2.4213, "step": 761 }, { "epoch": 0.29605886209659793, "grad_norm": 0.87109375, "learning_rate": 0.00017750778916457373, "loss": 2.3988, "step": 762 }, { "epoch": 0.29644739078701343, "grad_norm": 0.77734375, "learning_rate": 0.00017742196988876748, "loss": 2.416, "step": 763 }, { "epoch": 0.29683591947742893, "grad_norm": 0.859375, "learning_rate": 0.00017733600803187952, "loss": 2.4384, "step": 764 }, { "epoch": 0.2972244481678444, "grad_norm": 0.83984375, "learning_rate": 0.0001772499037522181, "loss": 2.3419, "step": 765 }, { "epoch": 0.2976129768582599, "grad_norm": 0.7421875, "learning_rate": 0.0001771636572083537, "loss": 2.4816, "step": 766 }, { "epoch": 0.2980015055486754, "grad_norm": 0.87890625, "learning_rate": 0.00017707726855911888, "loss": 2.4267, "step": 767 }, { "epoch": 0.29839003423909083, "grad_norm": 0.80859375, "learning_rate": 0.0001769907379636078, "loss": 2.3589, "step": 768 }, { "epoch": 0.29877856292950633, "grad_norm": 0.8359375, "learning_rate": 0.00017690406558117615, "loss": 2.4548, "step": 769 }, { "epoch": 0.29916709161992183, "grad_norm": 0.83984375, "learning_rate": 0.0001768172515714406, "loss": 2.4251, "step": 770 }, { "epoch": 0.2995556203103373, "grad_norm": 0.828125, "learning_rate": 0.0001767302960942787, "loss": 2.3883, "step": 771 }, { "epoch": 0.2999441490007528, "grad_norm": 0.87109375, "learning_rate": 0.00017664319930982863, "loss": 2.4494, "step": 772 }, { "epoch": 0.3003326776911683, "grad_norm": 0.8046875, "learning_rate": 0.00017655596137848857, "loss": 2.4177, "step": 773 }, { "epoch": 0.3007212063815837, "grad_norm": 0.8515625, "learning_rate": 0.00017646858246091692, "loss": 2.393, "step": 774 }, { "epoch": 0.30110973507199923, "grad_norm": 0.90625, "learning_rate": 0.00017638106271803143, "loss": 2.5158, "step": 775 }, { "epoch": 0.30149826376241473, "grad_norm": 0.8359375, "learning_rate": 0.00017629340231100943, "loss": 2.4587, "step": 776 }, { "epoch": 0.3018867924528302, "grad_norm": 1.0859375, "learning_rate": 0.00017620560140128722, "loss": 2.4661, "step": 777 }, { "epoch": 0.3022753211432457, "grad_norm": 0.8671875, "learning_rate": 0.0001761176601505598, "loss": 2.4737, "step": 778 }, { "epoch": 0.3026638498336612, "grad_norm": 0.9296875, "learning_rate": 0.0001760295787207807, "loss": 2.4462, "step": 779 }, { "epoch": 0.3030523785240766, "grad_norm": 0.765625, "learning_rate": 0.00017594135727416155, "loss": 2.4242, "step": 780 }, { "epoch": 0.30344090721449213, "grad_norm": 1.0234375, "learning_rate": 0.00017585299597317185, "loss": 2.4995, "step": 781 }, { "epoch": 0.30382943590490763, "grad_norm": 0.8515625, "learning_rate": 0.00017576449498053867, "loss": 2.4151, "step": 782 }, { "epoch": 0.3042179645953231, "grad_norm": 0.8515625, "learning_rate": 0.00017567585445924633, "loss": 2.3866, "step": 783 }, { "epoch": 0.3046064932857386, "grad_norm": 0.90625, "learning_rate": 0.00017558707457253613, "loss": 2.4381, "step": 784 }, { "epoch": 0.3049950219761541, "grad_norm": 0.83203125, "learning_rate": 0.000175498155483906, "loss": 2.4386, "step": 785 }, { "epoch": 0.3053835506665695, "grad_norm": 0.8203125, "learning_rate": 0.0001754090973571102, "loss": 2.4855, "step": 786 }, { "epoch": 0.305772079356985, "grad_norm": 0.80859375, "learning_rate": 0.00017531990035615915, "loss": 2.3814, "step": 787 }, { "epoch": 0.3061606080474005, "grad_norm": 0.8046875, "learning_rate": 0.00017523056464531885, "loss": 2.4152, "step": 788 }, { "epoch": 0.306549136737816, "grad_norm": 0.87890625, "learning_rate": 0.0001751410903891109, "loss": 2.4139, "step": 789 }, { "epoch": 0.3069376654282315, "grad_norm": 0.83203125, "learning_rate": 0.000175051477752312, "loss": 2.4099, "step": 790 }, { "epoch": 0.3073261941186469, "grad_norm": 0.84765625, "learning_rate": 0.00017496172689995368, "loss": 2.4541, "step": 791 }, { "epoch": 0.3077147228090624, "grad_norm": 0.84375, "learning_rate": 0.00017487183799732203, "loss": 2.5013, "step": 792 }, { "epoch": 0.3081032514994779, "grad_norm": 0.796875, "learning_rate": 0.0001747818112099573, "loss": 2.413, "step": 793 }, { "epoch": 0.30849178018989337, "grad_norm": 0.80859375, "learning_rate": 0.0001746916467036538, "loss": 2.4217, "step": 794 }, { "epoch": 0.3088803088803089, "grad_norm": 0.87109375, "learning_rate": 0.00017460134464445935, "loss": 2.4751, "step": 795 }, { "epoch": 0.3092688375707244, "grad_norm": 0.875, "learning_rate": 0.00017451090519867517, "loss": 2.4224, "step": 796 }, { "epoch": 0.3096573662611398, "grad_norm": 0.81640625, "learning_rate": 0.00017442032853285543, "loss": 2.3641, "step": 797 }, { "epoch": 0.3100458949515553, "grad_norm": 0.94921875, "learning_rate": 0.00017432961481380707, "loss": 2.5269, "step": 798 }, { "epoch": 0.3104344236419708, "grad_norm": 0.76953125, "learning_rate": 0.00017423876420858932, "loss": 2.3866, "step": 799 }, { "epoch": 0.31082295233238627, "grad_norm": 0.7890625, "learning_rate": 0.00017414777688451368, "loss": 2.4445, "step": 800 }, { "epoch": 0.3112114810228018, "grad_norm": 0.91015625, "learning_rate": 0.0001740566530091432, "loss": 2.4237, "step": 801 }, { "epoch": 0.3116000097132173, "grad_norm": 0.87109375, "learning_rate": 0.00017396539275029262, "loss": 2.4056, "step": 802 }, { "epoch": 0.3119885384036327, "grad_norm": 0.91796875, "learning_rate": 0.00017387399627602772, "loss": 2.4517, "step": 803 }, { "epoch": 0.3123770670940482, "grad_norm": 0.9375, "learning_rate": 0.00017378246375466513, "loss": 2.4313, "step": 804 }, { "epoch": 0.3127655957844637, "grad_norm": 0.8203125, "learning_rate": 0.00017369079535477217, "loss": 2.4828, "step": 805 }, { "epoch": 0.31315412447487917, "grad_norm": 0.89453125, "learning_rate": 0.00017359899124516623, "loss": 2.4158, "step": 806 }, { "epoch": 0.31354265316529467, "grad_norm": 0.921875, "learning_rate": 0.00017350705159491464, "loss": 2.3548, "step": 807 }, { "epoch": 0.3139311818557102, "grad_norm": 0.83984375, "learning_rate": 0.00017341497657333448, "loss": 2.427, "step": 808 }, { "epoch": 0.3143197105461256, "grad_norm": 0.76171875, "learning_rate": 0.000173322766349992, "loss": 2.4104, "step": 809 }, { "epoch": 0.3147082392365411, "grad_norm": 0.859375, "learning_rate": 0.00017323042109470245, "loss": 2.4454, "step": 810 }, { "epoch": 0.3150967679269566, "grad_norm": 0.91796875, "learning_rate": 0.0001731379409775298, "loss": 2.4989, "step": 811 }, { "epoch": 0.31548529661737207, "grad_norm": 0.87890625, "learning_rate": 0.0001730453261687864, "loss": 2.4115, "step": 812 }, { "epoch": 0.31587382530778757, "grad_norm": 0.84375, "learning_rate": 0.00017295257683903257, "loss": 2.3898, "step": 813 }, { "epoch": 0.31626235399820307, "grad_norm": 0.96875, "learning_rate": 0.00017285969315907648, "loss": 2.3612, "step": 814 }, { "epoch": 0.3166508826886185, "grad_norm": 0.86328125, "learning_rate": 0.00017276667529997355, "loss": 2.4465, "step": 815 }, { "epoch": 0.317039411379034, "grad_norm": 0.80859375, "learning_rate": 0.0001726735234330265, "loss": 2.4731, "step": 816 }, { "epoch": 0.3174279400694495, "grad_norm": 0.82421875, "learning_rate": 0.0001725802377297847, "loss": 2.4403, "step": 817 }, { "epoch": 0.31781646875986497, "grad_norm": 0.89453125, "learning_rate": 0.0001724868183620441, "loss": 2.3637, "step": 818 }, { "epoch": 0.31820499745028047, "grad_norm": 0.78515625, "learning_rate": 0.00017239326550184668, "loss": 2.3778, "step": 819 }, { "epoch": 0.31859352614069597, "grad_norm": 0.8046875, "learning_rate": 0.00017229957932148035, "loss": 2.451, "step": 820 }, { "epoch": 0.3189820548311114, "grad_norm": 0.79296875, "learning_rate": 0.00017220575999347856, "loss": 2.3804, "step": 821 }, { "epoch": 0.3193705835215269, "grad_norm": 0.84765625, "learning_rate": 0.0001721118076906199, "loss": 2.3504, "step": 822 }, { "epoch": 0.3197591122119424, "grad_norm": 0.796875, "learning_rate": 0.0001720177225859279, "loss": 2.4297, "step": 823 }, { "epoch": 0.32014764090235787, "grad_norm": 0.78515625, "learning_rate": 0.00017192350485267064, "loss": 2.3792, "step": 824 }, { "epoch": 0.32053616959277337, "grad_norm": 0.82421875, "learning_rate": 0.00017182915466436045, "loss": 2.3826, "step": 825 }, { "epoch": 0.32092469828318887, "grad_norm": 0.7890625, "learning_rate": 0.00017173467219475352, "loss": 2.3815, "step": 826 }, { "epoch": 0.3213132269736043, "grad_norm": 0.82421875, "learning_rate": 0.00017164005761784984, "loss": 2.4616, "step": 827 }, { "epoch": 0.3217017556640198, "grad_norm": 0.84765625, "learning_rate": 0.00017154531110789248, "loss": 2.3989, "step": 828 }, { "epoch": 0.3220902843544353, "grad_norm": 0.86328125, "learning_rate": 0.0001714504328393676, "loss": 2.4294, "step": 829 }, { "epoch": 0.32247881304485077, "grad_norm": 0.89453125, "learning_rate": 0.00017135542298700397, "loss": 2.4438, "step": 830 }, { "epoch": 0.32286734173526627, "grad_norm": 0.76953125, "learning_rate": 0.00017126028172577274, "loss": 2.4648, "step": 831 }, { "epoch": 0.32325587042568177, "grad_norm": 0.8984375, "learning_rate": 0.00017116500923088697, "loss": 2.5006, "step": 832 }, { "epoch": 0.3236443991160972, "grad_norm": 0.84765625, "learning_rate": 0.0001710696056778014, "loss": 2.4287, "step": 833 }, { "epoch": 0.3240329278065127, "grad_norm": 0.83984375, "learning_rate": 0.0001709740712422123, "loss": 2.3856, "step": 834 }, { "epoch": 0.3244214564969282, "grad_norm": 0.80078125, "learning_rate": 0.00017087840610005675, "loss": 2.4137, "step": 835 }, { "epoch": 0.32480998518734366, "grad_norm": 0.921875, "learning_rate": 0.0001707826104275127, "loss": 2.5085, "step": 836 }, { "epoch": 0.32519851387775917, "grad_norm": 0.85546875, "learning_rate": 0.0001706866844009984, "loss": 2.3359, "step": 837 }, { "epoch": 0.32558704256817467, "grad_norm": 0.703125, "learning_rate": 0.00017059062819717218, "loss": 2.3356, "step": 838 }, { "epoch": 0.3259755712585901, "grad_norm": 0.8671875, "learning_rate": 0.00017049444199293215, "loss": 2.4243, "step": 839 }, { "epoch": 0.3263640999490056, "grad_norm": 0.91015625, "learning_rate": 0.00017039812596541574, "loss": 2.4393, "step": 840 }, { "epoch": 0.3267526286394211, "grad_norm": 0.82421875, "learning_rate": 0.00017030168029199958, "loss": 2.3736, "step": 841 }, { "epoch": 0.32714115732983656, "grad_norm": 0.875, "learning_rate": 0.00017020510515029894, "loss": 2.4372, "step": 842 }, { "epoch": 0.32752968602025206, "grad_norm": 0.88671875, "learning_rate": 0.00017010840071816764, "loss": 2.4257, "step": 843 }, { "epoch": 0.32791821471066757, "grad_norm": 0.85546875, "learning_rate": 0.0001700115671736975, "loss": 2.3996, "step": 844 }, { "epoch": 0.328306743401083, "grad_norm": 0.85546875, "learning_rate": 0.00016991460469521817, "loss": 2.3186, "step": 845 }, { "epoch": 0.3286952720914985, "grad_norm": 1.15625, "learning_rate": 0.00016981751346129668, "loss": 2.4109, "step": 846 }, { "epoch": 0.329083800781914, "grad_norm": 0.76953125, "learning_rate": 0.0001697202936507373, "loss": 2.4756, "step": 847 }, { "epoch": 0.32947232947232946, "grad_norm": 0.9375, "learning_rate": 0.00016962294544258096, "loss": 2.4485, "step": 848 }, { "epoch": 0.32986085816274496, "grad_norm": 0.91796875, "learning_rate": 0.00016952546901610513, "loss": 2.3982, "step": 849 }, { "epoch": 0.33024938685316046, "grad_norm": 0.76953125, "learning_rate": 0.0001694278645508234, "loss": 2.4209, "step": 850 }, { "epoch": 0.3306379155435759, "grad_norm": 0.82421875, "learning_rate": 0.00016933013222648508, "loss": 2.4256, "step": 851 }, { "epoch": 0.3310264442339914, "grad_norm": 0.8359375, "learning_rate": 0.00016923227222307506, "loss": 2.4372, "step": 852 }, { "epoch": 0.3314149729244069, "grad_norm": 0.8046875, "learning_rate": 0.00016913428472081326, "loss": 2.366, "step": 853 }, { "epoch": 0.33180350161482236, "grad_norm": 0.8125, "learning_rate": 0.00016903616990015453, "loss": 2.3984, "step": 854 }, { "epoch": 0.33219203030523786, "grad_norm": 0.92578125, "learning_rate": 0.00016893792794178805, "loss": 2.3797, "step": 855 }, { "epoch": 0.3325805589956533, "grad_norm": 0.82421875, "learning_rate": 0.0001688395590266372, "loss": 2.4639, "step": 856 }, { "epoch": 0.3329690876860688, "grad_norm": 0.828125, "learning_rate": 0.0001687410633358592, "loss": 2.378, "step": 857 }, { "epoch": 0.3333576163764843, "grad_norm": 0.87890625, "learning_rate": 0.00016864244105084473, "loss": 2.4141, "step": 858 }, { "epoch": 0.33374614506689976, "grad_norm": 0.80078125, "learning_rate": 0.00016854369235321754, "loss": 2.4147, "step": 859 }, { "epoch": 0.33413467375731526, "grad_norm": 0.78125, "learning_rate": 0.00016844481742483424, "loss": 2.3797, "step": 860 }, { "epoch": 0.33452320244773076, "grad_norm": 0.8359375, "learning_rate": 0.0001683458164477839, "loss": 2.3711, "step": 861 }, { "epoch": 0.3349117311381462, "grad_norm": 0.85546875, "learning_rate": 0.00016824668960438771, "loss": 2.4823, "step": 862 }, { "epoch": 0.3353002598285617, "grad_norm": 0.875, "learning_rate": 0.00016814743707719868, "loss": 2.464, "step": 863 }, { "epoch": 0.3356887885189772, "grad_norm": 0.88671875, "learning_rate": 0.00016804805904900127, "loss": 2.4662, "step": 864 }, { "epoch": 0.33607731720939266, "grad_norm": 1.0078125, "learning_rate": 0.00016794855570281106, "loss": 2.3954, "step": 865 }, { "epoch": 0.33646584589980816, "grad_norm": 0.9140625, "learning_rate": 0.00016784892722187438, "loss": 2.4792, "step": 866 }, { "epoch": 0.33685437459022366, "grad_norm": 0.87890625, "learning_rate": 0.0001677491737896681, "loss": 2.3987, "step": 867 }, { "epoch": 0.3372429032806391, "grad_norm": 0.78125, "learning_rate": 0.00016764929558989907, "loss": 2.3545, "step": 868 }, { "epoch": 0.3376314319710546, "grad_norm": 0.91015625, "learning_rate": 0.0001675492928065041, "loss": 2.3862, "step": 869 }, { "epoch": 0.3380199606614701, "grad_norm": 0.84375, "learning_rate": 0.00016744916562364928, "loss": 2.4111, "step": 870 }, { "epoch": 0.33840848935188556, "grad_norm": 0.8828125, "learning_rate": 0.0001673489142257298, "loss": 2.4104, "step": 871 }, { "epoch": 0.33879701804230106, "grad_norm": 0.8046875, "learning_rate": 0.0001672485387973697, "loss": 2.3908, "step": 872 }, { "epoch": 0.33918554673271656, "grad_norm": 0.8203125, "learning_rate": 0.00016714803952342134, "loss": 2.3626, "step": 873 }, { "epoch": 0.339574075423132, "grad_norm": 0.84765625, "learning_rate": 0.00016704741658896526, "loss": 2.4738, "step": 874 }, { "epoch": 0.3399626041135475, "grad_norm": 0.80859375, "learning_rate": 0.0001669466701793096, "loss": 2.515, "step": 875 }, { "epoch": 0.340351132803963, "grad_norm": 0.78125, "learning_rate": 0.00016684580047999, "loss": 2.4536, "step": 876 }, { "epoch": 0.34073966149437845, "grad_norm": 0.765625, "learning_rate": 0.0001667448076767691, "loss": 2.4085, "step": 877 }, { "epoch": 0.34112819018479396, "grad_norm": 0.83203125, "learning_rate": 0.00016664369195563633, "loss": 2.4197, "step": 878 }, { "epoch": 0.34151671887520946, "grad_norm": 0.7734375, "learning_rate": 0.00016654245350280737, "loss": 2.3588, "step": 879 }, { "epoch": 0.3419052475656249, "grad_norm": 0.86328125, "learning_rate": 0.00016644109250472396, "loss": 2.4536, "step": 880 }, { "epoch": 0.3422937762560404, "grad_norm": 0.796875, "learning_rate": 0.00016633960914805358, "loss": 2.4565, "step": 881 }, { "epoch": 0.3426823049464559, "grad_norm": 0.7421875, "learning_rate": 0.00016623800361968894, "loss": 2.4165, "step": 882 }, { "epoch": 0.34307083363687135, "grad_norm": 0.74609375, "learning_rate": 0.0001661362761067479, "loss": 2.3367, "step": 883 }, { "epoch": 0.34345936232728685, "grad_norm": 0.7890625, "learning_rate": 0.0001660344267965728, "loss": 2.3624, "step": 884 }, { "epoch": 0.34384789101770236, "grad_norm": 0.75, "learning_rate": 0.0001659324558767304, "loss": 2.4241, "step": 885 }, { "epoch": 0.3442364197081178, "grad_norm": 0.80859375, "learning_rate": 0.00016583036353501134, "loss": 2.4773, "step": 886 }, { "epoch": 0.3446249483985333, "grad_norm": 0.7578125, "learning_rate": 0.00016572814995942993, "loss": 2.4632, "step": 887 }, { "epoch": 0.3450134770889488, "grad_norm": 0.7578125, "learning_rate": 0.00016562581533822375, "loss": 2.3947, "step": 888 }, { "epoch": 0.34540200577936425, "grad_norm": 0.796875, "learning_rate": 0.0001655233598598532, "loss": 2.4371, "step": 889 }, { "epoch": 0.34579053446977975, "grad_norm": 0.734375, "learning_rate": 0.0001654207837130014, "loss": 2.377, "step": 890 }, { "epoch": 0.34617906316019526, "grad_norm": 0.765625, "learning_rate": 0.0001653180870865736, "loss": 2.3894, "step": 891 }, { "epoch": 0.3465675918506107, "grad_norm": 0.83984375, "learning_rate": 0.00016521527016969689, "loss": 2.427, "step": 892 }, { "epoch": 0.3469561205410262, "grad_norm": 0.78125, "learning_rate": 0.00016511233315172, "loss": 2.4122, "step": 893 }, { "epoch": 0.3473446492314417, "grad_norm": 0.78515625, "learning_rate": 0.00016500927622221275, "loss": 2.4176, "step": 894 }, { "epoch": 0.34773317792185715, "grad_norm": 0.765625, "learning_rate": 0.00016490609957096588, "loss": 2.4046, "step": 895 }, { "epoch": 0.34812170661227265, "grad_norm": 0.84375, "learning_rate": 0.0001648028033879905, "loss": 2.3702, "step": 896 }, { "epoch": 0.34851023530268815, "grad_norm": 0.82421875, "learning_rate": 0.00016469938786351786, "loss": 2.4124, "step": 897 }, { "epoch": 0.3488987639931036, "grad_norm": 0.7734375, "learning_rate": 0.00016459585318799914, "loss": 2.3884, "step": 898 }, { "epoch": 0.3492872926835191, "grad_norm": 0.765625, "learning_rate": 0.00016449219955210476, "loss": 2.38, "step": 899 }, { "epoch": 0.3496758213739346, "grad_norm": 0.86328125, "learning_rate": 0.00016438842714672436, "loss": 2.4462, "step": 900 }, { "epoch": 0.35006435006435005, "grad_norm": 0.78125, "learning_rate": 0.0001642845361629662, "loss": 2.39, "step": 901 }, { "epoch": 0.35045287875476555, "grad_norm": 0.77734375, "learning_rate": 0.000164180526792157, "loss": 2.4532, "step": 902 }, { "epoch": 0.35084140744518105, "grad_norm": 0.9140625, "learning_rate": 0.0001640763992258415, "loss": 2.4183, "step": 903 }, { "epoch": 0.3512299361355965, "grad_norm": 0.828125, "learning_rate": 0.00016397215365578197, "loss": 2.4412, "step": 904 }, { "epoch": 0.351618464826012, "grad_norm": 0.83984375, "learning_rate": 0.00016386779027395822, "loss": 2.4002, "step": 905 }, { "epoch": 0.3520069935164275, "grad_norm": 0.83984375, "learning_rate": 0.00016376330927256682, "loss": 2.4019, "step": 906 }, { "epoch": 0.35239552220684295, "grad_norm": 0.765625, "learning_rate": 0.00016365871084402108, "loss": 2.4112, "step": 907 }, { "epoch": 0.35278405089725845, "grad_norm": 0.80078125, "learning_rate": 0.00016355399518095052, "loss": 2.3778, "step": 908 }, { "epoch": 0.35317257958767395, "grad_norm": 0.8203125, "learning_rate": 0.0001634491624762006, "loss": 2.4154, "step": 909 }, { "epoch": 0.3535611082780894, "grad_norm": 0.8828125, "learning_rate": 0.0001633442129228322, "loss": 2.4527, "step": 910 }, { "epoch": 0.3539496369685049, "grad_norm": 0.72265625, "learning_rate": 0.00016323914671412154, "loss": 2.3572, "step": 911 }, { "epoch": 0.3543381656589204, "grad_norm": 0.93359375, "learning_rate": 0.0001631339640435596, "loss": 2.4506, "step": 912 }, { "epoch": 0.35472669434933585, "grad_norm": 0.79296875, "learning_rate": 0.00016302866510485182, "loss": 2.3681, "step": 913 }, { "epoch": 0.35511522303975135, "grad_norm": 0.7890625, "learning_rate": 0.00016292325009191784, "loss": 2.4713, "step": 914 }, { "epoch": 0.35550375173016685, "grad_norm": 0.8125, "learning_rate": 0.00016281771919889098, "loss": 2.4329, "step": 915 }, { "epoch": 0.3558922804205823, "grad_norm": 0.93359375, "learning_rate": 0.000162712072620118, "loss": 2.4268, "step": 916 }, { "epoch": 0.3562808091109978, "grad_norm": 0.828125, "learning_rate": 0.0001626063105501587, "loss": 2.4348, "step": 917 }, { "epoch": 0.3566693378014133, "grad_norm": 0.74609375, "learning_rate": 0.00016250043318378563, "loss": 2.3795, "step": 918 }, { "epoch": 0.35705786649182875, "grad_norm": 0.7890625, "learning_rate": 0.00016239444071598354, "loss": 2.4631, "step": 919 }, { "epoch": 0.35744639518224425, "grad_norm": 0.76171875, "learning_rate": 0.00016228833334194927, "loss": 2.3645, "step": 920 }, { "epoch": 0.35783492387265975, "grad_norm": 0.734375, "learning_rate": 0.00016218211125709124, "loss": 2.4475, "step": 921 }, { "epoch": 0.3582234525630752, "grad_norm": 0.76171875, "learning_rate": 0.00016207577465702908, "loss": 2.4651, "step": 922 }, { "epoch": 0.3586119812534907, "grad_norm": 0.74609375, "learning_rate": 0.00016196932373759338, "loss": 2.416, "step": 923 }, { "epoch": 0.35900050994390614, "grad_norm": 0.8515625, "learning_rate": 0.0001618627586948252, "loss": 2.3977, "step": 924 }, { "epoch": 0.35938903863432164, "grad_norm": 0.76171875, "learning_rate": 0.00016175607972497585, "loss": 2.3937, "step": 925 }, { "epoch": 0.35977756732473715, "grad_norm": 0.73046875, "learning_rate": 0.00016164928702450633, "loss": 2.3936, "step": 926 }, { "epoch": 0.3601660960151526, "grad_norm": 0.74609375, "learning_rate": 0.00016154238079008719, "loss": 2.3655, "step": 927 }, { "epoch": 0.3605546247055681, "grad_norm": 0.7734375, "learning_rate": 0.00016143536121859803, "loss": 2.4051, "step": 928 }, { "epoch": 0.3609431533959836, "grad_norm": 0.7734375, "learning_rate": 0.0001613282285071272, "loss": 2.3786, "step": 929 }, { "epoch": 0.36133168208639904, "grad_norm": 0.79296875, "learning_rate": 0.00016122098285297132, "loss": 2.4234, "step": 930 }, { "epoch": 0.36172021077681454, "grad_norm": 0.8125, "learning_rate": 0.00016111362445363512, "loss": 2.3992, "step": 931 }, { "epoch": 0.36210873946723005, "grad_norm": 0.71484375, "learning_rate": 0.0001610061535068309, "loss": 2.3751, "step": 932 }, { "epoch": 0.3624972681576455, "grad_norm": 0.8046875, "learning_rate": 0.00016089857021047823, "loss": 2.4276, "step": 933 }, { "epoch": 0.362885796848061, "grad_norm": 0.82421875, "learning_rate": 0.0001607908747627036, "loss": 2.3446, "step": 934 }, { "epoch": 0.3632743255384765, "grad_norm": 0.95703125, "learning_rate": 0.00016068306736184004, "loss": 2.3987, "step": 935 }, { "epoch": 0.36366285422889194, "grad_norm": 0.76953125, "learning_rate": 0.00016057514820642677, "loss": 2.4452, "step": 936 }, { "epoch": 0.36405138291930744, "grad_norm": 0.8515625, "learning_rate": 0.00016046711749520876, "loss": 2.4444, "step": 937 }, { "epoch": 0.36443991160972294, "grad_norm": 0.78515625, "learning_rate": 0.00016035897542713648, "loss": 2.3775, "step": 938 }, { "epoch": 0.3648284403001384, "grad_norm": 0.8671875, "learning_rate": 0.00016025072220136543, "loss": 2.4025, "step": 939 }, { "epoch": 0.3652169689905539, "grad_norm": 0.81640625, "learning_rate": 0.00016014235801725587, "loss": 2.4327, "step": 940 }, { "epoch": 0.3656054976809694, "grad_norm": 0.82421875, "learning_rate": 0.00016003388307437238, "loss": 2.4564, "step": 941 }, { "epoch": 0.36599402637138484, "grad_norm": 0.84765625, "learning_rate": 0.00015992529757248347, "loss": 2.4282, "step": 942 }, { "epoch": 0.36638255506180034, "grad_norm": 0.7578125, "learning_rate": 0.00015981660171156134, "loss": 2.3523, "step": 943 }, { "epoch": 0.36677108375221584, "grad_norm": 0.78125, "learning_rate": 0.00015970779569178138, "loss": 2.4013, "step": 944 }, { "epoch": 0.3671596124426313, "grad_norm": 0.78515625, "learning_rate": 0.00015959887971352184, "loss": 2.3586, "step": 945 }, { "epoch": 0.3675481411330468, "grad_norm": 0.84375, "learning_rate": 0.00015948985397736348, "loss": 2.4387, "step": 946 }, { "epoch": 0.3679366698234623, "grad_norm": 0.8125, "learning_rate": 0.00015938071868408922, "loss": 2.3555, "step": 947 }, { "epoch": 0.36832519851387774, "grad_norm": 0.75, "learning_rate": 0.00015927147403468369, "loss": 2.3974, "step": 948 }, { "epoch": 0.36871372720429324, "grad_norm": 0.7890625, "learning_rate": 0.00015916212023033298, "loss": 2.4057, "step": 949 }, { "epoch": 0.36910225589470874, "grad_norm": 0.75, "learning_rate": 0.00015905265747242413, "loss": 2.4402, "step": 950 }, { "epoch": 0.3694907845851242, "grad_norm": 0.75390625, "learning_rate": 0.00015894308596254485, "loss": 2.4567, "step": 951 }, { "epoch": 0.3698793132755397, "grad_norm": 0.80078125, "learning_rate": 0.00015883340590248314, "loss": 2.4268, "step": 952 }, { "epoch": 0.3702678419659552, "grad_norm": 0.80078125, "learning_rate": 0.00015872361749422694, "loss": 2.379, "step": 953 }, { "epoch": 0.37065637065637064, "grad_norm": 0.75390625, "learning_rate": 0.0001586137209399637, "loss": 2.4159, "step": 954 }, { "epoch": 0.37104489934678614, "grad_norm": 0.80859375, "learning_rate": 0.00015850371644207995, "loss": 2.4021, "step": 955 }, { "epoch": 0.37143342803720164, "grad_norm": 0.734375, "learning_rate": 0.00015839360420316116, "loss": 2.4081, "step": 956 }, { "epoch": 0.3718219567276171, "grad_norm": 0.78515625, "learning_rate": 0.00015828338442599112, "loss": 2.4259, "step": 957 }, { "epoch": 0.3722104854180326, "grad_norm": 0.76953125, "learning_rate": 0.00015817305731355167, "loss": 2.4069, "step": 958 }, { "epoch": 0.3725990141084481, "grad_norm": 0.77734375, "learning_rate": 0.00015806262306902234, "loss": 2.4049, "step": 959 }, { "epoch": 0.37298754279886354, "grad_norm": 0.8125, "learning_rate": 0.00015795208189577995, "loss": 2.412, "step": 960 }, { "epoch": 0.37337607148927904, "grad_norm": 0.796875, "learning_rate": 0.00015784143399739822, "loss": 2.4012, "step": 961 }, { "epoch": 0.37376460017969454, "grad_norm": 0.8515625, "learning_rate": 0.00015773067957764752, "loss": 2.3994, "step": 962 }, { "epoch": 0.37415312887011, "grad_norm": 0.80078125, "learning_rate": 0.0001576198188404942, "loss": 2.4166, "step": 963 }, { "epoch": 0.3745416575605255, "grad_norm": 0.7421875, "learning_rate": 0.0001575088519901006, "loss": 2.3468, "step": 964 }, { "epoch": 0.374930186250941, "grad_norm": 0.76953125, "learning_rate": 0.0001573977792308243, "loss": 2.4512, "step": 965 }, { "epoch": 0.37531871494135643, "grad_norm": 0.7734375, "learning_rate": 0.00015728660076721812, "loss": 2.3884, "step": 966 }, { "epoch": 0.37570724363177194, "grad_norm": 0.83203125, "learning_rate": 0.00015717531680402934, "loss": 2.4164, "step": 967 }, { "epoch": 0.37609577232218744, "grad_norm": 0.78515625, "learning_rate": 0.00015706392754619974, "loss": 2.4216, "step": 968 }, { "epoch": 0.3764843010126029, "grad_norm": 0.91015625, "learning_rate": 0.00015695243319886483, "loss": 2.3632, "step": 969 }, { "epoch": 0.3768728297030184, "grad_norm": 0.828125, "learning_rate": 0.00015684083396735372, "loss": 2.3983, "step": 970 }, { "epoch": 0.3772613583934339, "grad_norm": 0.796875, "learning_rate": 0.0001567291300571887, "loss": 2.4777, "step": 971 }, { "epoch": 0.37764988708384933, "grad_norm": 0.8203125, "learning_rate": 0.00015661732167408484, "loss": 2.431, "step": 972 }, { "epoch": 0.37803841577426484, "grad_norm": 0.8984375, "learning_rate": 0.00015650540902394954, "loss": 2.486, "step": 973 }, { "epoch": 0.37842694446468034, "grad_norm": 0.78125, "learning_rate": 0.00015639339231288233, "loss": 2.4245, "step": 974 }, { "epoch": 0.3788154731550958, "grad_norm": 0.7578125, "learning_rate": 0.00015628127174717427, "loss": 2.3979, "step": 975 }, { "epoch": 0.3792040018455113, "grad_norm": 0.75, "learning_rate": 0.00015616904753330775, "loss": 2.3735, "step": 976 }, { "epoch": 0.3795925305359268, "grad_norm": 0.77734375, "learning_rate": 0.00015605671987795595, "loss": 2.4248, "step": 977 }, { "epoch": 0.37998105922634223, "grad_norm": 0.7578125, "learning_rate": 0.00015594428898798272, "loss": 2.4653, "step": 978 }, { "epoch": 0.38036958791675773, "grad_norm": 0.77734375, "learning_rate": 0.00015583175507044185, "loss": 2.4554, "step": 979 }, { "epoch": 0.38075811660717324, "grad_norm": 0.8671875, "learning_rate": 0.00015571911833257696, "loss": 2.4347, "step": 980 }, { "epoch": 0.3811466452975887, "grad_norm": 0.78515625, "learning_rate": 0.000155606378981821, "loss": 2.4075, "step": 981 }, { "epoch": 0.3815351739880042, "grad_norm": 0.6953125, "learning_rate": 0.00015549353722579592, "loss": 2.3812, "step": 982 }, { "epoch": 0.3819237026784197, "grad_norm": 0.6953125, "learning_rate": 0.0001553805932723122, "loss": 2.2825, "step": 983 }, { "epoch": 0.38231223136883513, "grad_norm": 0.78515625, "learning_rate": 0.00015526754732936862, "loss": 2.4434, "step": 984 }, { "epoch": 0.38270076005925063, "grad_norm": 0.7578125, "learning_rate": 0.00015515439960515166, "loss": 2.3885, "step": 985 }, { "epoch": 0.38308928874966613, "grad_norm": 0.7578125, "learning_rate": 0.00015504115030803535, "loss": 2.3675, "step": 986 }, { "epoch": 0.3834778174400816, "grad_norm": 0.75, "learning_rate": 0.00015492779964658074, "loss": 2.3827, "step": 987 }, { "epoch": 0.3838663461304971, "grad_norm": 0.859375, "learning_rate": 0.00015481434782953555, "loss": 2.4087, "step": 988 }, { "epoch": 0.3842548748209126, "grad_norm": 0.86328125, "learning_rate": 0.00015470079506583377, "loss": 2.4245, "step": 989 }, { "epoch": 0.38464340351132803, "grad_norm": 0.7890625, "learning_rate": 0.00015458714156459536, "loss": 2.376, "step": 990 }, { "epoch": 0.38503193220174353, "grad_norm": 0.90625, "learning_rate": 0.00015447338753512573, "loss": 2.3831, "step": 991 }, { "epoch": 0.385420460892159, "grad_norm": 0.92578125, "learning_rate": 0.00015435953318691543, "loss": 2.4737, "step": 992 }, { "epoch": 0.3858089895825745, "grad_norm": 0.796875, "learning_rate": 0.00015424557872963982, "loss": 2.4105, "step": 993 }, { "epoch": 0.38619751827299, "grad_norm": 0.89453125, "learning_rate": 0.00015413152437315856, "loss": 2.4475, "step": 994 }, { "epoch": 0.3865860469634054, "grad_norm": 0.78125, "learning_rate": 0.00015401737032751532, "loss": 2.455, "step": 995 }, { "epoch": 0.38697457565382093, "grad_norm": 0.734375, "learning_rate": 0.00015390311680293727, "loss": 2.3793, "step": 996 }, { "epoch": 0.38736310434423643, "grad_norm": 0.78515625, "learning_rate": 0.00015378876400983494, "loss": 2.406, "step": 997 }, { "epoch": 0.3877516330346519, "grad_norm": 0.8203125, "learning_rate": 0.00015367431215880157, "loss": 2.4224, "step": 998 }, { "epoch": 0.3881401617250674, "grad_norm": 1.015625, "learning_rate": 0.00015355976146061285, "loss": 2.4179, "step": 999 }, { "epoch": 0.3885286904154829, "grad_norm": 0.80859375, "learning_rate": 0.00015344511212622648, "loss": 2.4217, "step": 1000 }, { "epoch": 0.3889172191058983, "grad_norm": 0.80859375, "learning_rate": 0.00015333036436678183, "loss": 2.4036, "step": 1001 }, { "epoch": 0.38930574779631383, "grad_norm": 0.78125, "learning_rate": 0.00015321551839359953, "loss": 2.472, "step": 1002 }, { "epoch": 0.38969427648672933, "grad_norm": 0.87890625, "learning_rate": 0.00015310057441818113, "loss": 2.4167, "step": 1003 }, { "epoch": 0.3900828051771448, "grad_norm": 0.7890625, "learning_rate": 0.00015298553265220854, "loss": 2.4069, "step": 1004 }, { "epoch": 0.3904713338675603, "grad_norm": 0.76953125, "learning_rate": 0.0001528703933075439, "loss": 2.3354, "step": 1005 }, { "epoch": 0.3908598625579758, "grad_norm": 0.74609375, "learning_rate": 0.00015275515659622898, "loss": 2.3972, "step": 1006 }, { "epoch": 0.3912483912483912, "grad_norm": 0.79296875, "learning_rate": 0.00015263982273048488, "loss": 2.4468, "step": 1007 }, { "epoch": 0.3916369199388067, "grad_norm": 0.7890625, "learning_rate": 0.00015252439192271156, "loss": 2.4281, "step": 1008 }, { "epoch": 0.39202544862922223, "grad_norm": 0.78125, "learning_rate": 0.0001524088643854876, "loss": 2.3852, "step": 1009 }, { "epoch": 0.3924139773196377, "grad_norm": 0.97265625, "learning_rate": 0.00015229324033156968, "loss": 2.4179, "step": 1010 }, { "epoch": 0.3928025060100532, "grad_norm": 0.83984375, "learning_rate": 0.00015217751997389223, "loss": 2.4206, "step": 1011 }, { "epoch": 0.3931910347004687, "grad_norm": 0.76953125, "learning_rate": 0.00015206170352556703, "loss": 2.3465, "step": 1012 }, { "epoch": 0.3935795633908841, "grad_norm": 0.76171875, "learning_rate": 0.0001519457911998828, "loss": 2.3244, "step": 1013 }, { "epoch": 0.3939680920812996, "grad_norm": 0.796875, "learning_rate": 0.00015182978321030482, "loss": 2.3715, "step": 1014 }, { "epoch": 0.3943566207717151, "grad_norm": 0.77734375, "learning_rate": 0.00015171367977047465, "loss": 2.396, "step": 1015 }, { "epoch": 0.3947451494621306, "grad_norm": 0.8828125, "learning_rate": 0.00015159748109420956, "loss": 2.4217, "step": 1016 }, { "epoch": 0.3951336781525461, "grad_norm": 0.7265625, "learning_rate": 0.00015148118739550213, "loss": 2.4046, "step": 1017 }, { "epoch": 0.3955222068429616, "grad_norm": 0.76171875, "learning_rate": 0.00015136479888852006, "loss": 2.3725, "step": 1018 }, { "epoch": 0.395910735533377, "grad_norm": 0.85546875, "learning_rate": 0.00015124831578760558, "loss": 2.3309, "step": 1019 }, { "epoch": 0.3962992642237925, "grad_norm": 0.8125, "learning_rate": 0.00015113173830727514, "loss": 2.4652, "step": 1020 }, { "epoch": 0.396687792914208, "grad_norm": 0.74609375, "learning_rate": 0.00015101506666221904, "loss": 2.3596, "step": 1021 }, { "epoch": 0.3970763216046235, "grad_norm": 0.73046875, "learning_rate": 0.00015089830106730095, "loss": 2.311, "step": 1022 }, { "epoch": 0.397464850295039, "grad_norm": 0.7578125, "learning_rate": 0.00015078144173755754, "loss": 2.3798, "step": 1023 }, { "epoch": 0.3978533789854545, "grad_norm": 0.734375, "learning_rate": 0.00015066448888819816, "loss": 2.3415, "step": 1024 }, { "epoch": 0.3982419076758699, "grad_norm": 0.78125, "learning_rate": 0.00015054744273460436, "loss": 2.4462, "step": 1025 }, { "epoch": 0.3986304363662854, "grad_norm": 0.75390625, "learning_rate": 0.00015043030349232947, "loss": 2.3352, "step": 1026 }, { "epoch": 0.3990189650567009, "grad_norm": 0.75390625, "learning_rate": 0.00015031307137709833, "loss": 2.3445, "step": 1027 }, { "epoch": 0.39940749374711637, "grad_norm": 0.73828125, "learning_rate": 0.00015019574660480684, "loss": 2.342, "step": 1028 }, { "epoch": 0.3997960224375319, "grad_norm": 0.8203125, "learning_rate": 0.00015007832939152143, "loss": 2.4521, "step": 1029 }, { "epoch": 0.4001845511279474, "grad_norm": 0.78125, "learning_rate": 0.00014996081995347885, "loss": 2.3904, "step": 1030 }, { "epoch": 0.4005730798183628, "grad_norm": 0.7890625, "learning_rate": 0.00014984321850708562, "loss": 2.4335, "step": 1031 }, { "epoch": 0.4009616085087783, "grad_norm": 0.75390625, "learning_rate": 0.00014972552526891782, "loss": 2.4618, "step": 1032 }, { "epoch": 0.4013501371991938, "grad_norm": 0.76171875, "learning_rate": 0.00014960774045572045, "loss": 2.4655, "step": 1033 }, { "epoch": 0.40173866588960927, "grad_norm": 0.703125, "learning_rate": 0.00014948986428440727, "loss": 2.2844, "step": 1034 }, { "epoch": 0.40212719458002477, "grad_norm": 0.734375, "learning_rate": 0.0001493718969720602, "loss": 2.3762, "step": 1035 }, { "epoch": 0.4025157232704403, "grad_norm": 0.78125, "learning_rate": 0.00014925383873592895, "loss": 2.4462, "step": 1036 }, { "epoch": 0.4029042519608557, "grad_norm": 0.73828125, "learning_rate": 0.0001491356897934309, "loss": 2.4108, "step": 1037 }, { "epoch": 0.4032927806512712, "grad_norm": 0.79296875, "learning_rate": 0.00014901745036215023, "loss": 2.4457, "step": 1038 }, { "epoch": 0.4036813093416867, "grad_norm": 0.74609375, "learning_rate": 0.00014889912065983794, "loss": 2.2958, "step": 1039 }, { "epoch": 0.40406983803210217, "grad_norm": 0.76171875, "learning_rate": 0.0001487807009044112, "loss": 2.3535, "step": 1040 }, { "epoch": 0.40445836672251767, "grad_norm": 0.75390625, "learning_rate": 0.00014866219131395297, "loss": 2.403, "step": 1041 }, { "epoch": 0.40484689541293317, "grad_norm": 0.8515625, "learning_rate": 0.00014854359210671173, "loss": 2.397, "step": 1042 }, { "epoch": 0.4052354241033486, "grad_norm": 0.76953125, "learning_rate": 0.00014842490350110103, "loss": 2.4236, "step": 1043 }, { "epoch": 0.4056239527937641, "grad_norm": 0.84375, "learning_rate": 0.00014830612571569896, "loss": 2.3644, "step": 1044 }, { "epoch": 0.4060124814841796, "grad_norm": 0.74609375, "learning_rate": 0.00014818725896924786, "loss": 2.4009, "step": 1045 }, { "epoch": 0.40640101017459507, "grad_norm": 0.73046875, "learning_rate": 0.000148068303480654, "loss": 2.3757, "step": 1046 }, { "epoch": 0.40678953886501057, "grad_norm": 0.796875, "learning_rate": 0.000147949259468987, "loss": 2.3917, "step": 1047 }, { "epoch": 0.40717806755542607, "grad_norm": 0.8046875, "learning_rate": 0.00014783012715347945, "loss": 2.4178, "step": 1048 }, { "epoch": 0.4075665962458415, "grad_norm": 0.84765625, "learning_rate": 0.00014771090675352665, "loss": 2.412, "step": 1049 }, { "epoch": 0.407955124936257, "grad_norm": 0.7578125, "learning_rate": 0.00014759159848868613, "loss": 2.4158, "step": 1050 }, { "epoch": 0.4083436536266725, "grad_norm": 0.7578125, "learning_rate": 0.00014747220257867717, "loss": 2.3423, "step": 1051 }, { "epoch": 0.40873218231708797, "grad_norm": 0.80859375, "learning_rate": 0.0001473527192433805, "loss": 2.3611, "step": 1052 }, { "epoch": 0.40912071100750347, "grad_norm": 0.79296875, "learning_rate": 0.0001472331487028378, "loss": 2.369, "step": 1053 }, { "epoch": 0.40950923969791897, "grad_norm": 0.7578125, "learning_rate": 0.00014711349117725144, "loss": 2.3826, "step": 1054 }, { "epoch": 0.4098977683883344, "grad_norm": 0.76171875, "learning_rate": 0.0001469937468869839, "loss": 2.4114, "step": 1055 }, { "epoch": 0.4102862970787499, "grad_norm": 0.7734375, "learning_rate": 0.00014687391605255746, "loss": 2.408, "step": 1056 }, { "epoch": 0.4106748257691654, "grad_norm": 0.75390625, "learning_rate": 0.0001467539988946538, "loss": 2.3509, "step": 1057 }, { "epoch": 0.41106335445958087, "grad_norm": 0.8125, "learning_rate": 0.00014663399563411358, "loss": 2.4604, "step": 1058 }, { "epoch": 0.41145188314999637, "grad_norm": 0.75, "learning_rate": 0.00014651390649193598, "loss": 2.409, "step": 1059 }, { "epoch": 0.4118404118404118, "grad_norm": 0.7890625, "learning_rate": 0.00014639373168927845, "loss": 2.4373, "step": 1060 }, { "epoch": 0.4122289405308273, "grad_norm": 0.82421875, "learning_rate": 0.0001462734714474561, "loss": 2.4317, "step": 1061 }, { "epoch": 0.4126174692212428, "grad_norm": 0.78125, "learning_rate": 0.00014615312598794135, "loss": 2.4266, "step": 1062 }, { "epoch": 0.41300599791165826, "grad_norm": 0.74609375, "learning_rate": 0.00014603269553236365, "loss": 2.3566, "step": 1063 }, { "epoch": 0.41339452660207376, "grad_norm": 0.84375, "learning_rate": 0.00014591218030250892, "loss": 2.4233, "step": 1064 }, { "epoch": 0.41378305529248927, "grad_norm": 0.73828125, "learning_rate": 0.0001457915805203193, "loss": 2.4206, "step": 1065 }, { "epoch": 0.4141715839829047, "grad_norm": 0.71875, "learning_rate": 0.00014567089640789247, "loss": 2.3427, "step": 1066 }, { "epoch": 0.4145601126733202, "grad_norm": 0.7421875, "learning_rate": 0.00014555012818748156, "loss": 2.3342, "step": 1067 }, { "epoch": 0.4149486413637357, "grad_norm": 0.734375, "learning_rate": 0.00014542927608149456, "loss": 2.3007, "step": 1068 }, { "epoch": 0.41533717005415116, "grad_norm": 0.78515625, "learning_rate": 0.0001453083403124939, "loss": 2.3456, "step": 1069 }, { "epoch": 0.41572569874456666, "grad_norm": 0.921875, "learning_rate": 0.00014518732110319613, "loss": 2.3655, "step": 1070 }, { "epoch": 0.41611422743498216, "grad_norm": 0.73828125, "learning_rate": 0.00014506621867647142, "loss": 2.3755, "step": 1071 }, { "epoch": 0.4165027561253976, "grad_norm": 0.74609375, "learning_rate": 0.00014494503325534322, "loss": 2.3675, "step": 1072 }, { "epoch": 0.4168912848158131, "grad_norm": 0.84375, "learning_rate": 0.0001448237650629879, "loss": 2.4362, "step": 1073 }, { "epoch": 0.4172798135062286, "grad_norm": 0.78515625, "learning_rate": 0.0001447024143227341, "loss": 2.3739, "step": 1074 }, { "epoch": 0.41766834219664406, "grad_norm": 0.8203125, "learning_rate": 0.0001445809812580626, "loss": 2.3791, "step": 1075 }, { "epoch": 0.41805687088705956, "grad_norm": 0.74609375, "learning_rate": 0.00014445946609260578, "loss": 2.4234, "step": 1076 }, { "epoch": 0.41844539957747506, "grad_norm": 0.8671875, "learning_rate": 0.00014433786905014716, "loss": 2.4474, "step": 1077 }, { "epoch": 0.4188339282678905, "grad_norm": 0.87109375, "learning_rate": 0.00014421619035462116, "loss": 2.4105, "step": 1078 }, { "epoch": 0.419222456958306, "grad_norm": 0.765625, "learning_rate": 0.00014409443023011238, "loss": 2.435, "step": 1079 }, { "epoch": 0.4196109856487215, "grad_norm": 0.89453125, "learning_rate": 0.00014397258890085554, "loss": 2.4015, "step": 1080 }, { "epoch": 0.41999951433913696, "grad_norm": 0.84765625, "learning_rate": 0.00014385066659123487, "loss": 2.3862, "step": 1081 }, { "epoch": 0.42038804302955246, "grad_norm": 0.84765625, "learning_rate": 0.00014372866352578375, "loss": 2.432, "step": 1082 }, { "epoch": 0.42077657171996796, "grad_norm": 0.765625, "learning_rate": 0.00014360657992918422, "loss": 2.3696, "step": 1083 }, { "epoch": 0.4211651004103834, "grad_norm": 0.828125, "learning_rate": 0.0001434844160262667, "loss": 2.3393, "step": 1084 }, { "epoch": 0.4215536291007989, "grad_norm": 0.7734375, "learning_rate": 0.00014336217204200942, "loss": 2.4385, "step": 1085 }, { "epoch": 0.4219421577912144, "grad_norm": 0.78515625, "learning_rate": 0.0001432398482015382, "loss": 2.3572, "step": 1086 }, { "epoch": 0.42233068648162986, "grad_norm": 0.828125, "learning_rate": 0.0001431174447301258, "loss": 2.4231, "step": 1087 }, { "epoch": 0.42271921517204536, "grad_norm": 0.7578125, "learning_rate": 0.0001429949618531917, "loss": 2.3236, "step": 1088 }, { "epoch": 0.42310774386246086, "grad_norm": 0.8046875, "learning_rate": 0.00014287239979630164, "loss": 2.3515, "step": 1089 }, { "epoch": 0.4234962725528763, "grad_norm": 0.7734375, "learning_rate": 0.0001427497587851671, "loss": 2.3919, "step": 1090 }, { "epoch": 0.4238848012432918, "grad_norm": 0.8671875, "learning_rate": 0.00014262703904564504, "loss": 2.3854, "step": 1091 }, { "epoch": 0.4242733299337073, "grad_norm": 0.79296875, "learning_rate": 0.00014250424080373736, "loss": 2.4189, "step": 1092 }, { "epoch": 0.42466185862412276, "grad_norm": 0.8125, "learning_rate": 0.0001423813642855905, "loss": 2.4222, "step": 1093 }, { "epoch": 0.42505038731453826, "grad_norm": 0.76171875, "learning_rate": 0.00014225840971749518, "loss": 2.3939, "step": 1094 }, { "epoch": 0.42543891600495376, "grad_norm": 0.75390625, "learning_rate": 0.0001421353773258857, "loss": 2.4524, "step": 1095 }, { "epoch": 0.4258274446953692, "grad_norm": 0.8125, "learning_rate": 0.0001420122673373398, "loss": 2.3831, "step": 1096 }, { "epoch": 0.4262159733857847, "grad_norm": 0.88671875, "learning_rate": 0.00014188907997857804, "loss": 2.2944, "step": 1097 }, { "epoch": 0.4266045020762002, "grad_norm": 0.7734375, "learning_rate": 0.00014176581547646353, "loss": 2.3749, "step": 1098 }, { "epoch": 0.42699303076661566, "grad_norm": 0.7578125, "learning_rate": 0.00014164247405800144, "loss": 2.3957, "step": 1099 }, { "epoch": 0.42738155945703116, "grad_norm": 0.73828125, "learning_rate": 0.00014151905595033852, "loss": 2.3632, "step": 1100 }, { "epoch": 0.42777008814744666, "grad_norm": 0.8984375, "learning_rate": 0.00014139556138076286, "loss": 2.384, "step": 1101 }, { "epoch": 0.4281586168378621, "grad_norm": 0.76953125, "learning_rate": 0.00014127199057670326, "loss": 2.4353, "step": 1102 }, { "epoch": 0.4285471455282776, "grad_norm": 0.75390625, "learning_rate": 0.00014114834376572897, "loss": 2.3899, "step": 1103 }, { "epoch": 0.4289356742186931, "grad_norm": 0.77734375, "learning_rate": 0.00014102462117554924, "loss": 2.4308, "step": 1104 }, { "epoch": 0.42932420290910855, "grad_norm": 0.7890625, "learning_rate": 0.0001409008230340128, "loss": 2.4151, "step": 1105 }, { "epoch": 0.42971273159952406, "grad_norm": 0.734375, "learning_rate": 0.0001407769495691076, "loss": 2.3521, "step": 1106 }, { "epoch": 0.43010126028993956, "grad_norm": 0.7734375, "learning_rate": 0.00014065300100896022, "loss": 2.3728, "step": 1107 }, { "epoch": 0.430489788980355, "grad_norm": 0.76953125, "learning_rate": 0.0001405289775818356, "loss": 2.4092, "step": 1108 }, { "epoch": 0.4308783176707705, "grad_norm": 0.76953125, "learning_rate": 0.00014040487951613658, "loss": 2.3365, "step": 1109 }, { "epoch": 0.431266846361186, "grad_norm": 0.796875, "learning_rate": 0.0001402807070404033, "loss": 2.3233, "step": 1110 }, { "epoch": 0.43165537505160145, "grad_norm": 0.7890625, "learning_rate": 0.00014015646038331313, "loss": 2.3724, "step": 1111 }, { "epoch": 0.43204390374201695, "grad_norm": 0.80859375, "learning_rate": 0.00014003213977367994, "loss": 2.4238, "step": 1112 }, { "epoch": 0.43243243243243246, "grad_norm": 0.8359375, "learning_rate": 0.0001399077454404539, "loss": 2.4004, "step": 1113 }, { "epoch": 0.4328209611228479, "grad_norm": 0.78125, "learning_rate": 0.00013978327761272072, "loss": 2.4084, "step": 1114 }, { "epoch": 0.4332094898132634, "grad_norm": 0.75390625, "learning_rate": 0.00013965873651970175, "loss": 2.3828, "step": 1115 }, { "epoch": 0.4335980185036789, "grad_norm": 0.703125, "learning_rate": 0.00013953412239075302, "loss": 2.3548, "step": 1116 }, { "epoch": 0.43398654719409435, "grad_norm": 0.80859375, "learning_rate": 0.00013940943545536524, "loss": 2.4108, "step": 1117 }, { "epoch": 0.43437507588450985, "grad_norm": 0.7265625, "learning_rate": 0.0001392846759431631, "loss": 2.3684, "step": 1118 }, { "epoch": 0.43476360457492536, "grad_norm": 0.828125, "learning_rate": 0.00013915984408390496, "loss": 2.3744, "step": 1119 }, { "epoch": 0.4351521332653408, "grad_norm": 0.7890625, "learning_rate": 0.00013903494010748246, "loss": 2.3971, "step": 1120 }, { "epoch": 0.4355406619557563, "grad_norm": 0.79296875, "learning_rate": 0.00013890996424392006, "loss": 2.4105, "step": 1121 }, { "epoch": 0.4359291906461718, "grad_norm": 0.7265625, "learning_rate": 0.0001387849167233745, "loss": 2.2925, "step": 1122 }, { "epoch": 0.43631771933658725, "grad_norm": 0.75, "learning_rate": 0.00013865979777613458, "loss": 2.3574, "step": 1123 }, { "epoch": 0.43670624802700275, "grad_norm": 0.78125, "learning_rate": 0.00013853460763262062, "loss": 2.3376, "step": 1124 }, { "epoch": 0.43709477671741825, "grad_norm": 0.76953125, "learning_rate": 0.00013840934652338405, "loss": 2.4241, "step": 1125 }, { "epoch": 0.4374833054078337, "grad_norm": 0.7890625, "learning_rate": 0.00013828401467910704, "loss": 2.3839, "step": 1126 }, { "epoch": 0.4378718340982492, "grad_norm": 0.7734375, "learning_rate": 0.00013815861233060193, "loss": 2.3961, "step": 1127 }, { "epoch": 0.43826036278866465, "grad_norm": 0.73828125, "learning_rate": 0.00013803313970881092, "loss": 2.3902, "step": 1128 }, { "epoch": 0.43864889147908015, "grad_norm": 0.85546875, "learning_rate": 0.00013790759704480573, "loss": 2.4001, "step": 1129 }, { "epoch": 0.43903742016949565, "grad_norm": 0.8046875, "learning_rate": 0.00013778198456978696, "loss": 2.3353, "step": 1130 }, { "epoch": 0.4394259488599111, "grad_norm": 0.78125, "learning_rate": 0.00013765630251508386, "loss": 2.3685, "step": 1131 }, { "epoch": 0.4398144775503266, "grad_norm": 0.73828125, "learning_rate": 0.00013753055111215368, "loss": 2.3767, "step": 1132 }, { "epoch": 0.4402030062407421, "grad_norm": 0.8359375, "learning_rate": 0.00013740473059258154, "loss": 2.417, "step": 1133 }, { "epoch": 0.44059153493115755, "grad_norm": 0.8359375, "learning_rate": 0.00013727884118807976, "loss": 2.4301, "step": 1134 }, { "epoch": 0.44098006362157305, "grad_norm": 0.75, "learning_rate": 0.00013715288313048758, "loss": 2.4261, "step": 1135 }, { "epoch": 0.44136859231198855, "grad_norm": 0.78125, "learning_rate": 0.00013702685665177054, "loss": 2.4326, "step": 1136 }, { "epoch": 0.441757121002404, "grad_norm": 0.91796875, "learning_rate": 0.00013690076198402036, "loss": 2.3514, "step": 1137 }, { "epoch": 0.4421456496928195, "grad_norm": 0.94921875, "learning_rate": 0.00013677459935945425, "loss": 2.4182, "step": 1138 }, { "epoch": 0.442534178383235, "grad_norm": 0.7734375, "learning_rate": 0.00013664836901041452, "loss": 2.4883, "step": 1139 }, { "epoch": 0.44292270707365045, "grad_norm": 0.75390625, "learning_rate": 0.00013652207116936828, "loss": 2.3386, "step": 1140 }, { "epoch": 0.44331123576406595, "grad_norm": 0.74609375, "learning_rate": 0.00013639570606890694, "loss": 2.3897, "step": 1141 }, { "epoch": 0.44369976445448145, "grad_norm": 0.8046875, "learning_rate": 0.00013626927394174568, "loss": 2.3805, "step": 1142 }, { "epoch": 0.4440882931448969, "grad_norm": 0.80078125, "learning_rate": 0.00013614277502072326, "loss": 2.358, "step": 1143 }, { "epoch": 0.4444768218353124, "grad_norm": 0.84765625, "learning_rate": 0.0001360162095388013, "loss": 2.3745, "step": 1144 }, { "epoch": 0.4448653505257279, "grad_norm": 0.72265625, "learning_rate": 0.00013588957772906412, "loss": 2.3668, "step": 1145 }, { "epoch": 0.44525387921614334, "grad_norm": 0.8046875, "learning_rate": 0.00013576287982471812, "loss": 2.3509, "step": 1146 }, { "epoch": 0.44564240790655885, "grad_norm": 0.74609375, "learning_rate": 0.00013563611605909141, "loss": 2.3841, "step": 1147 }, { "epoch": 0.44603093659697435, "grad_norm": 0.8125, "learning_rate": 0.00013550928666563347, "loss": 2.3929, "step": 1148 }, { "epoch": 0.4464194652873898, "grad_norm": 0.734375, "learning_rate": 0.0001353823918779146, "loss": 2.377, "step": 1149 }, { "epoch": 0.4468079939778053, "grad_norm": 0.7265625, "learning_rate": 0.00013525543192962548, "loss": 2.4199, "step": 1150 }, { "epoch": 0.4471965226682208, "grad_norm": 0.7734375, "learning_rate": 0.00013512840705457685, "loss": 2.4202, "step": 1151 }, { "epoch": 0.44758505135863624, "grad_norm": 0.7421875, "learning_rate": 0.000135001317486699, "loss": 2.3966, "step": 1152 }, { "epoch": 0.44797358004905175, "grad_norm": 0.70703125, "learning_rate": 0.00013487416346004137, "loss": 2.4289, "step": 1153 }, { "epoch": 0.44836210873946725, "grad_norm": 0.75, "learning_rate": 0.00013474694520877208, "loss": 2.3513, "step": 1154 }, { "epoch": 0.4487506374298827, "grad_norm": 0.79296875, "learning_rate": 0.0001346196629671776, "loss": 2.3925, "step": 1155 }, { "epoch": 0.4491391661202982, "grad_norm": 0.88671875, "learning_rate": 0.00013449231696966208, "loss": 2.4184, "step": 1156 }, { "epoch": 0.4495276948107137, "grad_norm": 0.75, "learning_rate": 0.00013436490745074735, "loss": 2.364, "step": 1157 }, { "epoch": 0.44991622350112914, "grad_norm": 0.734375, "learning_rate": 0.00013423743464507194, "loss": 2.3693, "step": 1158 }, { "epoch": 0.45030475219154464, "grad_norm": 0.84765625, "learning_rate": 0.0001341098987873911, "loss": 2.3543, "step": 1159 }, { "epoch": 0.45069328088196015, "grad_norm": 0.7578125, "learning_rate": 0.00013398230011257614, "loss": 2.426, "step": 1160 }, { "epoch": 0.4510818095723756, "grad_norm": 0.77734375, "learning_rate": 0.00013385463885561412, "loss": 2.3451, "step": 1161 }, { "epoch": 0.4514703382627911, "grad_norm": 0.7265625, "learning_rate": 0.00013372691525160725, "loss": 2.3361, "step": 1162 }, { "epoch": 0.4518588669532066, "grad_norm": 0.89453125, "learning_rate": 0.0001335991295357726, "loss": 2.4234, "step": 1163 }, { "epoch": 0.45224739564362204, "grad_norm": 0.78125, "learning_rate": 0.00013347128194344168, "loss": 2.3376, "step": 1164 }, { "epoch": 0.45263592433403754, "grad_norm": 0.7265625, "learning_rate": 0.0001333433727100599, "loss": 2.3383, "step": 1165 }, { "epoch": 0.45302445302445304, "grad_norm": 1.3203125, "learning_rate": 0.00013321540207118614, "loss": 2.4261, "step": 1166 }, { "epoch": 0.4534129817148685, "grad_norm": 0.81640625, "learning_rate": 0.00013308737026249248, "loss": 2.4405, "step": 1167 }, { "epoch": 0.453801510405284, "grad_norm": 0.76953125, "learning_rate": 0.00013295927751976358, "loss": 2.356, "step": 1168 }, { "epoch": 0.4541900390956995, "grad_norm": 0.8515625, "learning_rate": 0.00013283112407889633, "loss": 2.4666, "step": 1169 }, { "epoch": 0.45457856778611494, "grad_norm": 0.76953125, "learning_rate": 0.00013270291017589936, "loss": 2.4644, "step": 1170 }, { "epoch": 0.45496709647653044, "grad_norm": 0.796875, "learning_rate": 0.00013257463604689275, "loss": 2.3546, "step": 1171 }, { "epoch": 0.45535562516694594, "grad_norm": 0.86328125, "learning_rate": 0.0001324463019281074, "loss": 2.4329, "step": 1172 }, { "epoch": 0.4557441538573614, "grad_norm": 0.75390625, "learning_rate": 0.00013231790805588468, "loss": 2.3792, "step": 1173 }, { "epoch": 0.4561326825477769, "grad_norm": 0.7890625, "learning_rate": 0.0001321894546666761, "loss": 2.352, "step": 1174 }, { "epoch": 0.4565212112381924, "grad_norm": 0.76953125, "learning_rate": 0.00013206094199704274, "loss": 2.3812, "step": 1175 }, { "epoch": 0.45690973992860784, "grad_norm": 0.72265625, "learning_rate": 0.00013193237028365478, "loss": 2.3032, "step": 1176 }, { "epoch": 0.45729826861902334, "grad_norm": 0.6953125, "learning_rate": 0.00013180373976329118, "loss": 2.3755, "step": 1177 }, { "epoch": 0.45768679730943884, "grad_norm": 0.69921875, "learning_rate": 0.00013167505067283926, "loss": 2.4432, "step": 1178 }, { "epoch": 0.4580753259998543, "grad_norm": 0.66015625, "learning_rate": 0.0001315463032492941, "loss": 2.2993, "step": 1179 }, { "epoch": 0.4584638546902698, "grad_norm": 0.71484375, "learning_rate": 0.00013141749772975825, "loss": 2.3378, "step": 1180 }, { "epoch": 0.4588523833806853, "grad_norm": 14.75, "learning_rate": 0.00013128863435144127, "loss": 2.37, "step": 1181 }, { "epoch": 0.45924091207110074, "grad_norm": 0.79296875, "learning_rate": 0.00013115971335165926, "loss": 2.3849, "step": 1182 }, { "epoch": 0.45962944076151624, "grad_norm": 0.8046875, "learning_rate": 0.00013103073496783447, "loss": 2.4525, "step": 1183 }, { "epoch": 0.46001796945193174, "grad_norm": 0.78515625, "learning_rate": 0.00013090169943749476, "loss": 2.3965, "step": 1184 }, { "epoch": 0.4604064981423472, "grad_norm": 0.74609375, "learning_rate": 0.00013077260699827326, "loss": 2.4148, "step": 1185 }, { "epoch": 0.4607950268327627, "grad_norm": 0.68359375, "learning_rate": 0.00013064345788790788, "loss": 2.3294, "step": 1186 }, { "epoch": 0.4611835555231782, "grad_norm": 0.70703125, "learning_rate": 0.00013051425234424105, "loss": 2.4082, "step": 1187 }, { "epoch": 0.46157208421359364, "grad_norm": 0.74609375, "learning_rate": 0.00013038499060521886, "loss": 2.4042, "step": 1188 }, { "epoch": 0.46196061290400914, "grad_norm": 0.83203125, "learning_rate": 0.00013025567290889112, "loss": 2.4304, "step": 1189 }, { "epoch": 0.46234914159442464, "grad_norm": 0.8046875, "learning_rate": 0.00013012629949341053, "loss": 2.3919, "step": 1190 }, { "epoch": 0.4627376702848401, "grad_norm": 0.7109375, "learning_rate": 0.00012999687059703257, "loss": 2.3465, "step": 1191 }, { "epoch": 0.4631261989752556, "grad_norm": 0.75, "learning_rate": 0.0001298673864581147, "loss": 2.3778, "step": 1192 }, { "epoch": 0.4635147276656711, "grad_norm": 0.7890625, "learning_rate": 0.00012973784731511637, "loss": 2.4215, "step": 1193 }, { "epoch": 0.46390325635608654, "grad_norm": 0.7734375, "learning_rate": 0.00012960825340659803, "loss": 2.3797, "step": 1194 }, { "epoch": 0.46429178504650204, "grad_norm": 0.78125, "learning_rate": 0.00012947860497122119, "loss": 2.4168, "step": 1195 }, { "epoch": 0.4646803137369175, "grad_norm": 0.7421875, "learning_rate": 0.0001293489022477477, "loss": 2.3733, "step": 1196 }, { "epoch": 0.465068842427333, "grad_norm": 0.78515625, "learning_rate": 0.00012921914547503943, "loss": 2.3437, "step": 1197 }, { "epoch": 0.4654573711177485, "grad_norm": 0.75390625, "learning_rate": 0.00012908933489205775, "loss": 2.3236, "step": 1198 }, { "epoch": 0.46584589980816393, "grad_norm": 0.76171875, "learning_rate": 0.00012895947073786313, "loss": 2.4033, "step": 1199 }, { "epoch": 0.46623442849857943, "grad_norm": 0.74609375, "learning_rate": 0.00012882955325161472, "loss": 2.3573, "step": 1200 }, { "epoch": 0.46662295718899494, "grad_norm": 0.703125, "learning_rate": 0.00012869958267256988, "loss": 2.3784, "step": 1201 }, { "epoch": 0.4670114858794104, "grad_norm": 0.734375, "learning_rate": 0.00012856955924008375, "loss": 2.3903, "step": 1202 }, { "epoch": 0.4674000145698259, "grad_norm": 0.7265625, "learning_rate": 0.00012843948319360874, "loss": 2.3779, "step": 1203 }, { "epoch": 0.4677885432602414, "grad_norm": 0.7890625, "learning_rate": 0.00012830935477269425, "loss": 2.4628, "step": 1204 }, { "epoch": 0.46817707195065683, "grad_norm": 0.765625, "learning_rate": 0.00012817917421698613, "loss": 2.3718, "step": 1205 }, { "epoch": 0.46856560064107233, "grad_norm": 0.73828125, "learning_rate": 0.00012804894176622617, "loss": 2.368, "step": 1206 }, { "epoch": 0.46895412933148783, "grad_norm": 0.72265625, "learning_rate": 0.00012791865766025176, "loss": 2.4149, "step": 1207 }, { "epoch": 0.4693426580219033, "grad_norm": 0.76171875, "learning_rate": 0.0001277883221389954, "loss": 2.3687, "step": 1208 }, { "epoch": 0.4697311867123188, "grad_norm": 0.7734375, "learning_rate": 0.00012765793544248442, "loss": 2.4661, "step": 1209 }, { "epoch": 0.4701197154027343, "grad_norm": 0.73046875, "learning_rate": 0.0001275274978108401, "loss": 2.3438, "step": 1210 }, { "epoch": 0.47050824409314973, "grad_norm": 0.73046875, "learning_rate": 0.00012739700948427785, "loss": 2.4013, "step": 1211 }, { "epoch": 0.47089677278356523, "grad_norm": 0.73046875, "learning_rate": 0.00012726647070310622, "loss": 2.4636, "step": 1212 }, { "epoch": 0.47128530147398073, "grad_norm": 0.7734375, "learning_rate": 0.00012713588170772675, "loss": 2.4221, "step": 1213 }, { "epoch": 0.4716738301643962, "grad_norm": 0.70703125, "learning_rate": 0.00012700524273863347, "loss": 2.4232, "step": 1214 }, { "epoch": 0.4720623588548117, "grad_norm": 0.8046875, "learning_rate": 0.00012687455403641242, "loss": 2.3794, "step": 1215 }, { "epoch": 0.4724508875452272, "grad_norm": 0.74609375, "learning_rate": 0.00012674381584174124, "loss": 2.4058, "step": 1216 }, { "epoch": 0.47283941623564263, "grad_norm": 0.78515625, "learning_rate": 0.00012661302839538866, "loss": 2.3517, "step": 1217 }, { "epoch": 0.47322794492605813, "grad_norm": 0.75, "learning_rate": 0.00012648219193821424, "loss": 2.4763, "step": 1218 }, { "epoch": 0.47361647361647363, "grad_norm": 0.75390625, "learning_rate": 0.00012635130671116772, "loss": 2.3852, "step": 1219 }, { "epoch": 0.4740050023068891, "grad_norm": 0.87890625, "learning_rate": 0.00012622037295528858, "loss": 2.3484, "step": 1220 }, { "epoch": 0.4743935309973046, "grad_norm": 0.72265625, "learning_rate": 0.00012608939091170577, "loss": 2.3919, "step": 1221 }, { "epoch": 0.4747820596877201, "grad_norm": 0.71484375, "learning_rate": 0.00012595836082163718, "loss": 2.4426, "step": 1222 }, { "epoch": 0.4751705883781355, "grad_norm": 0.69921875, "learning_rate": 0.00012582728292638912, "loss": 2.3485, "step": 1223 }, { "epoch": 0.47555911706855103, "grad_norm": 0.72265625, "learning_rate": 0.000125696157467356, "loss": 2.4109, "step": 1224 }, { "epoch": 0.47594764575896653, "grad_norm": 0.76953125, "learning_rate": 0.00012556498468601975, "loss": 2.4374, "step": 1225 }, { "epoch": 0.476336174449382, "grad_norm": 0.76171875, "learning_rate": 0.0001254337648239495, "loss": 2.3384, "step": 1226 }, { "epoch": 0.4767247031397975, "grad_norm": 0.734375, "learning_rate": 0.00012530249812280108, "loss": 2.3056, "step": 1227 }, { "epoch": 0.477113231830213, "grad_norm": 0.765625, "learning_rate": 0.0001251711848243166, "loss": 2.416, "step": 1228 }, { "epoch": 0.4775017605206284, "grad_norm": 0.73046875, "learning_rate": 0.00012503982517032388, "loss": 2.3606, "step": 1229 }, { "epoch": 0.47789028921104393, "grad_norm": 0.734375, "learning_rate": 0.00012490841940273627, "loss": 2.4288, "step": 1230 }, { "epoch": 0.47827881790145943, "grad_norm": 0.76171875, "learning_rate": 0.0001247769677635519, "loss": 2.4206, "step": 1231 }, { "epoch": 0.4786673465918749, "grad_norm": 0.71484375, "learning_rate": 0.00012464547049485347, "loss": 2.3527, "step": 1232 }, { "epoch": 0.4790558752822904, "grad_norm": 0.75390625, "learning_rate": 0.00012451392783880766, "loss": 2.4884, "step": 1233 }, { "epoch": 0.4794444039727059, "grad_norm": 0.765625, "learning_rate": 0.00012438234003766478, "loss": 2.4246, "step": 1234 }, { "epoch": 0.4798329326631213, "grad_norm": 0.7578125, "learning_rate": 0.0001242507073337582, "loss": 2.4027, "step": 1235 }, { "epoch": 0.4802214613535368, "grad_norm": 0.73828125, "learning_rate": 0.00012411902996950407, "loss": 2.3954, "step": 1236 }, { "epoch": 0.48060999004395233, "grad_norm": 0.76171875, "learning_rate": 0.00012398730818740077, "loss": 2.4057, "step": 1237 }, { "epoch": 0.4809985187343678, "grad_norm": 0.75, "learning_rate": 0.00012385554223002845, "loss": 2.3871, "step": 1238 }, { "epoch": 0.4813870474247833, "grad_norm": 0.71484375, "learning_rate": 0.0001237237323400486, "loss": 2.4078, "step": 1239 }, { "epoch": 0.4817755761151988, "grad_norm": 0.75, "learning_rate": 0.00012359187876020367, "loss": 2.4764, "step": 1240 }, { "epoch": 0.4821641048056142, "grad_norm": 0.703125, "learning_rate": 0.0001234599817333166, "loss": 2.3753, "step": 1241 }, { "epoch": 0.4825526334960297, "grad_norm": 0.73828125, "learning_rate": 0.00012332804150229018, "loss": 2.3435, "step": 1242 }, { "epoch": 0.4829411621864452, "grad_norm": 0.703125, "learning_rate": 0.00012319605831010694, "loss": 2.3939, "step": 1243 }, { "epoch": 0.4833296908768607, "grad_norm": 0.78515625, "learning_rate": 0.00012306403239982844, "loss": 2.4263, "step": 1244 }, { "epoch": 0.4837182195672762, "grad_norm": 0.8203125, "learning_rate": 0.00012293196401459494, "loss": 2.4282, "step": 1245 }, { "epoch": 0.4841067482576917, "grad_norm": 0.78515625, "learning_rate": 0.0001227998533976249, "loss": 2.3939, "step": 1246 }, { "epoch": 0.4844952769481071, "grad_norm": 0.84765625, "learning_rate": 0.00012266770079221457, "loss": 2.4431, "step": 1247 }, { "epoch": 0.4848838056385226, "grad_norm": 0.765625, "learning_rate": 0.00012253550644173754, "loss": 2.3905, "step": 1248 }, { "epoch": 0.4852723343289381, "grad_norm": 0.78125, "learning_rate": 0.00012240327058964424, "loss": 2.372, "step": 1249 }, { "epoch": 0.4856608630193536, "grad_norm": 0.76953125, "learning_rate": 0.00012227099347946155, "loss": 2.4248, "step": 1250 }, { "epoch": 0.4860493917097691, "grad_norm": 0.734375, "learning_rate": 0.00012213867535479234, "loss": 2.3693, "step": 1251 }, { "epoch": 0.4864379204001846, "grad_norm": 0.7109375, "learning_rate": 0.000122006316459315, "loss": 2.3079, "step": 1252 }, { "epoch": 0.4868264490906, "grad_norm": 0.80078125, "learning_rate": 0.00012187391703678301, "loss": 2.3852, "step": 1253 }, { "epoch": 0.4872149777810155, "grad_norm": 0.75390625, "learning_rate": 0.00012174147733102448, "loss": 2.409, "step": 1254 }, { "epoch": 0.487603506471431, "grad_norm": 0.84765625, "learning_rate": 0.00012160899758594176, "loss": 2.3169, "step": 1255 }, { "epoch": 0.48799203516184647, "grad_norm": 0.78125, "learning_rate": 0.00012147647804551078, "loss": 2.4159, "step": 1256 }, { "epoch": 0.488380563852262, "grad_norm": 0.765625, "learning_rate": 0.00012134391895378097, "loss": 2.3722, "step": 1257 }, { "epoch": 0.4887690925426775, "grad_norm": 0.8203125, "learning_rate": 0.00012121132055487441, "loss": 2.3752, "step": 1258 }, { "epoch": 0.4891576212330929, "grad_norm": 0.796875, "learning_rate": 0.00012107868309298574, "loss": 2.3253, "step": 1259 }, { "epoch": 0.4895461499235084, "grad_norm": 0.8046875, "learning_rate": 0.00012094600681238135, "loss": 2.3405, "step": 1260 }, { "epoch": 0.4899346786139239, "grad_norm": 0.765625, "learning_rate": 0.00012081329195739928, "loss": 2.3236, "step": 1261 }, { "epoch": 0.49032320730433937, "grad_norm": 0.6953125, "learning_rate": 0.00012068053877244853, "loss": 2.408, "step": 1262 }, { "epoch": 0.49071173599475487, "grad_norm": 0.69921875, "learning_rate": 0.0001205477475020087, "loss": 2.3682, "step": 1263 }, { "epoch": 0.4911002646851703, "grad_norm": 0.90625, "learning_rate": 0.0001204149183906296, "loss": 2.4261, "step": 1264 }, { "epoch": 0.4914887933755858, "grad_norm": 0.85546875, "learning_rate": 0.00012028205168293056, "loss": 2.3513, "step": 1265 }, { "epoch": 0.4918773220660013, "grad_norm": 0.70703125, "learning_rate": 0.0001201491476236003, "loss": 2.4025, "step": 1266 }, { "epoch": 0.49226585075641677, "grad_norm": 0.8046875, "learning_rate": 0.00012001620645739629, "loss": 2.3808, "step": 1267 }, { "epoch": 0.49265437944683227, "grad_norm": 0.76953125, "learning_rate": 0.0001198832284291443, "loss": 2.4421, "step": 1268 }, { "epoch": 0.49304290813724777, "grad_norm": 0.81640625, "learning_rate": 0.00011975021378373802, "loss": 2.4174, "step": 1269 }, { "epoch": 0.4934314368276632, "grad_norm": 0.71875, "learning_rate": 0.00011961716276613857, "loss": 2.3233, "step": 1270 }, { "epoch": 0.4938199655180787, "grad_norm": 0.73046875, "learning_rate": 0.00011948407562137404, "loss": 2.4112, "step": 1271 }, { "epoch": 0.4942084942084942, "grad_norm": 0.7734375, "learning_rate": 0.0001193509525945391, "loss": 2.4136, "step": 1272 }, { "epoch": 0.49459702289890967, "grad_norm": 0.72265625, "learning_rate": 0.00011921779393079438, "loss": 2.3525, "step": 1273 }, { "epoch": 0.49498555158932517, "grad_norm": 0.734375, "learning_rate": 0.00011908459987536629, "loss": 2.3849, "step": 1274 }, { "epoch": 0.49537408027974067, "grad_norm": 0.74609375, "learning_rate": 0.0001189513706735463, "loss": 2.4242, "step": 1275 }, { "epoch": 0.4957626089701561, "grad_norm": 0.73046875, "learning_rate": 0.00011881810657069068, "loss": 2.3559, "step": 1276 }, { "epoch": 0.4961511376605716, "grad_norm": 0.7734375, "learning_rate": 0.00011868480781221995, "loss": 2.3649, "step": 1277 }, { "epoch": 0.4965396663509871, "grad_norm": 0.73828125, "learning_rate": 0.00011855147464361845, "loss": 2.2891, "step": 1278 }, { "epoch": 0.49692819504140257, "grad_norm": 0.73828125, "learning_rate": 0.00011841810731043386, "loss": 2.4437, "step": 1279 }, { "epoch": 0.49731672373181807, "grad_norm": 0.78125, "learning_rate": 0.00011828470605827682, "loss": 2.3526, "step": 1280 }, { "epoch": 0.49770525242223357, "grad_norm": 0.78515625, "learning_rate": 0.00011815127113282047, "loss": 2.3752, "step": 1281 }, { "epoch": 0.498093781112649, "grad_norm": 0.73046875, "learning_rate": 0.00011801780277979986, "loss": 2.3949, "step": 1282 }, { "epoch": 0.4984823098030645, "grad_norm": 0.75390625, "learning_rate": 0.00011788430124501167, "loss": 2.4448, "step": 1283 }, { "epoch": 0.49887083849348, "grad_norm": 0.7578125, "learning_rate": 0.0001177507667743137, "loss": 2.3877, "step": 1284 }, { "epoch": 0.49925936718389546, "grad_norm": 0.8359375, "learning_rate": 0.00011761719961362434, "loss": 2.3495, "step": 1285 }, { "epoch": 0.49964789587431097, "grad_norm": 0.85546875, "learning_rate": 0.00011748360000892227, "loss": 2.4588, "step": 1286 }, { "epoch": 0.5000364245647264, "grad_norm": 0.734375, "learning_rate": 0.00011734996820624581, "loss": 2.3902, "step": 1287 }, { "epoch": 0.5004249532551419, "grad_norm": 0.76171875, "learning_rate": 0.0001172163044516927, "loss": 2.3359, "step": 1288 }, { "epoch": 0.5008134819455574, "grad_norm": 0.70703125, "learning_rate": 0.00011708260899141943, "loss": 2.3574, "step": 1289 }, { "epoch": 0.5012020106359729, "grad_norm": 0.703125, "learning_rate": 0.00011694888207164091, "loss": 2.3206, "step": 1290 }, { "epoch": 0.5015905393263884, "grad_norm": 0.7578125, "learning_rate": 0.00011681512393862999, "loss": 2.4011, "step": 1291 }, { "epoch": 0.5019790680168039, "grad_norm": 0.69140625, "learning_rate": 0.00011668133483871699, "loss": 2.3868, "step": 1292 }, { "epoch": 0.5023675967072193, "grad_norm": 0.73828125, "learning_rate": 0.00011654751501828927, "loss": 2.373, "step": 1293 }, { "epoch": 0.5027561253976348, "grad_norm": 0.71875, "learning_rate": 0.00011641366472379078, "loss": 2.3772, "step": 1294 }, { "epoch": 0.5031446540880503, "grad_norm": 0.77734375, "learning_rate": 0.00011627978420172156, "loss": 2.4444, "step": 1295 }, { "epoch": 0.5035331827784658, "grad_norm": 0.71875, "learning_rate": 0.00011614587369863737, "loss": 2.3138, "step": 1296 }, { "epoch": 0.5039217114688813, "grad_norm": 0.6953125, "learning_rate": 0.00011601193346114905, "loss": 2.2997, "step": 1297 }, { "epoch": 0.5043102401592967, "grad_norm": 0.671875, "learning_rate": 0.00011587796373592237, "loss": 2.3125, "step": 1298 }, { "epoch": 0.5046987688497122, "grad_norm": 0.73046875, "learning_rate": 0.00011574396476967732, "loss": 2.423, "step": 1299 }, { "epoch": 0.5050872975401277, "grad_norm": 0.703125, "learning_rate": 0.00011560993680918774, "loss": 2.3215, "step": 1300 }, { "epoch": 0.5054758262305432, "grad_norm": 0.72265625, "learning_rate": 0.00011547588010128088, "loss": 2.3576, "step": 1301 }, { "epoch": 0.5058643549209587, "grad_norm": 0.76953125, "learning_rate": 0.0001153417948928369, "loss": 2.3201, "step": 1302 }, { "epoch": 0.5062528836113742, "grad_norm": 0.765625, "learning_rate": 0.00011520768143078853, "loss": 2.4282, "step": 1303 }, { "epoch": 0.5066414123017896, "grad_norm": 0.77734375, "learning_rate": 0.00011507353996212043, "loss": 2.3862, "step": 1304 }, { "epoch": 0.5070299409922051, "grad_norm": 0.76953125, "learning_rate": 0.00011493937073386889, "loss": 2.3773, "step": 1305 }, { "epoch": 0.5074184696826206, "grad_norm": 0.7421875, "learning_rate": 0.00011480517399312134, "loss": 2.4023, "step": 1306 }, { "epoch": 0.5078069983730361, "grad_norm": 0.73828125, "learning_rate": 0.00011467094998701582, "loss": 2.3434, "step": 1307 }, { "epoch": 0.5081955270634516, "grad_norm": 0.75, "learning_rate": 0.00011453669896274066, "loss": 2.3864, "step": 1308 }, { "epoch": 0.5085840557538671, "grad_norm": 0.765625, "learning_rate": 0.00011440242116753384, "loss": 2.3932, "step": 1309 }, { "epoch": 0.5089725844442825, "grad_norm": 0.71875, "learning_rate": 0.00011426811684868275, "loss": 2.3176, "step": 1310 }, { "epoch": 0.509361113134698, "grad_norm": 0.76171875, "learning_rate": 0.00011413378625352358, "loss": 2.4461, "step": 1311 }, { "epoch": 0.5097496418251135, "grad_norm": 0.7265625, "learning_rate": 0.0001139994296294409, "loss": 2.3435, "step": 1312 }, { "epoch": 0.510138170515529, "grad_norm": 0.7265625, "learning_rate": 0.0001138650472238672, "loss": 2.329, "step": 1313 }, { "epoch": 0.5105266992059445, "grad_norm": 0.69140625, "learning_rate": 0.00011373063928428252, "loss": 2.3996, "step": 1314 }, { "epoch": 0.51091522789636, "grad_norm": 0.74609375, "learning_rate": 0.00011359620605821384, "loss": 2.4047, "step": 1315 }, { "epoch": 0.5113037565867754, "grad_norm": 0.71875, "learning_rate": 0.0001134617477932348, "loss": 2.4164, "step": 1316 }, { "epoch": 0.5116922852771909, "grad_norm": 0.73828125, "learning_rate": 0.00011332726473696505, "loss": 2.326, "step": 1317 }, { "epoch": 0.5120808139676064, "grad_norm": 0.70703125, "learning_rate": 0.00011319275713706996, "loss": 2.3532, "step": 1318 }, { "epoch": 0.5124693426580219, "grad_norm": 0.73828125, "learning_rate": 0.00011305822524126007, "loss": 2.3886, "step": 1319 }, { "epoch": 0.5128578713484374, "grad_norm": 0.7421875, "learning_rate": 0.0001129236692972907, "loss": 2.4516, "step": 1320 }, { "epoch": 0.5132464000388529, "grad_norm": 0.7265625, "learning_rate": 0.00011278908955296143, "loss": 2.371, "step": 1321 }, { "epoch": 0.5136349287292683, "grad_norm": 0.71875, "learning_rate": 0.00011265448625611568, "loss": 2.3791, "step": 1322 }, { "epoch": 0.5140234574196838, "grad_norm": 0.69140625, "learning_rate": 0.00011251985965464022, "loss": 2.4764, "step": 1323 }, { "epoch": 0.5144119861100993, "grad_norm": 0.6953125, "learning_rate": 0.00011238520999646479, "loss": 2.4049, "step": 1324 }, { "epoch": 0.5148005148005148, "grad_norm": 0.76171875, "learning_rate": 0.00011225053752956152, "loss": 2.3209, "step": 1325 }, { "epoch": 0.5151890434909303, "grad_norm": 0.6796875, "learning_rate": 0.00011211584250194463, "loss": 2.3245, "step": 1326 }, { "epoch": 0.5155775721813458, "grad_norm": 0.77734375, "learning_rate": 0.0001119811251616698, "loss": 2.3636, "step": 1327 }, { "epoch": 0.5159661008717612, "grad_norm": 0.69140625, "learning_rate": 0.00011184638575683388, "loss": 2.3771, "step": 1328 }, { "epoch": 0.5163546295621767, "grad_norm": 0.75390625, "learning_rate": 0.00011171162453557431, "loss": 2.4129, "step": 1329 }, { "epoch": 0.5167431582525922, "grad_norm": 0.76171875, "learning_rate": 0.00011157684174606872, "loss": 2.3208, "step": 1330 }, { "epoch": 0.5171316869430077, "grad_norm": 0.703125, "learning_rate": 0.00011144203763653443, "loss": 2.4374, "step": 1331 }, { "epoch": 0.5175202156334232, "grad_norm": 0.74609375, "learning_rate": 0.00011130721245522808, "loss": 2.4326, "step": 1332 }, { "epoch": 0.5179087443238387, "grad_norm": 0.69921875, "learning_rate": 0.00011117236645044506, "loss": 2.3765, "step": 1333 }, { "epoch": 0.5182972730142541, "grad_norm": 0.6953125, "learning_rate": 0.00011103749987051916, "loss": 2.3262, "step": 1334 }, { "epoch": 0.5186858017046696, "grad_norm": 0.703125, "learning_rate": 0.00011090261296382202, "loss": 2.3944, "step": 1335 }, { "epoch": 0.5190743303950851, "grad_norm": 0.6796875, "learning_rate": 0.00011076770597876272, "loss": 2.3092, "step": 1336 }, { "epoch": 0.5194628590855006, "grad_norm": 0.70703125, "learning_rate": 0.00011063277916378736, "loss": 2.3485, "step": 1337 }, { "epoch": 0.5198513877759161, "grad_norm": 0.765625, "learning_rate": 0.0001104978327673785, "loss": 2.4069, "step": 1338 }, { "epoch": 0.5202399164663316, "grad_norm": 0.7421875, "learning_rate": 0.00011036286703805478, "loss": 2.4546, "step": 1339 }, { "epoch": 0.520628445156747, "grad_norm": 0.71875, "learning_rate": 0.00011022788222437048, "loss": 2.3471, "step": 1340 }, { "epoch": 0.5210169738471625, "grad_norm": 0.71875, "learning_rate": 0.00011009287857491497, "loss": 2.4365, "step": 1341 }, { "epoch": 0.521405502537578, "grad_norm": 0.6796875, "learning_rate": 0.00010995785633831233, "loss": 2.358, "step": 1342 }, { "epoch": 0.5217940312279935, "grad_norm": 0.73828125, "learning_rate": 0.00010982281576322091, "loss": 2.3183, "step": 1343 }, { "epoch": 0.522182559918409, "grad_norm": 0.72265625, "learning_rate": 0.00010968775709833274, "loss": 2.4138, "step": 1344 }, { "epoch": 0.5225710886088245, "grad_norm": 0.73046875, "learning_rate": 0.00010955268059237327, "loss": 2.3831, "step": 1345 }, { "epoch": 0.5229596172992399, "grad_norm": 0.765625, "learning_rate": 0.00010941758649410075, "loss": 2.3526, "step": 1346 }, { "epoch": 0.5233481459896554, "grad_norm": 0.7265625, "learning_rate": 0.0001092824750523058, "loss": 2.3985, "step": 1347 }, { "epoch": 0.5237366746800709, "grad_norm": 0.73046875, "learning_rate": 0.00010914734651581108, "loss": 2.4037, "step": 1348 }, { "epoch": 0.5241252033704864, "grad_norm": 0.7578125, "learning_rate": 0.00010901220113347062, "loss": 2.4123, "step": 1349 }, { "epoch": 0.5245137320609019, "grad_norm": 0.7578125, "learning_rate": 0.00010887703915416951, "loss": 2.3917, "step": 1350 }, { "epoch": 0.5249022607513174, "grad_norm": 0.765625, "learning_rate": 0.00010874186082682345, "loss": 2.4087, "step": 1351 }, { "epoch": 0.5252907894417328, "grad_norm": 0.69140625, "learning_rate": 0.00010860666640037825, "loss": 2.3286, "step": 1352 }, { "epoch": 0.5256793181321483, "grad_norm": 0.74609375, "learning_rate": 0.00010847145612380923, "loss": 2.4452, "step": 1353 }, { "epoch": 0.5260678468225638, "grad_norm": 0.82421875, "learning_rate": 0.00010833623024612105, "loss": 2.3288, "step": 1354 }, { "epoch": 0.5264563755129793, "grad_norm": 0.76953125, "learning_rate": 0.00010820098901634705, "loss": 2.3248, "step": 1355 }, { "epoch": 0.5268449042033948, "grad_norm": 0.6953125, "learning_rate": 0.0001080657326835489, "loss": 2.3524, "step": 1356 }, { "epoch": 0.5272334328938103, "grad_norm": 0.73046875, "learning_rate": 0.00010793046149681593, "loss": 2.3483, "step": 1357 }, { "epoch": 0.5276219615842257, "grad_norm": 0.71875, "learning_rate": 0.00010779517570526499, "loss": 2.4338, "step": 1358 }, { "epoch": 0.5280104902746412, "grad_norm": 0.7109375, "learning_rate": 0.00010765987555803973, "loss": 2.4, "step": 1359 }, { "epoch": 0.5283990189650567, "grad_norm": 0.7578125, "learning_rate": 0.00010752456130431032, "loss": 2.3773, "step": 1360 }, { "epoch": 0.5287875476554722, "grad_norm": 0.734375, "learning_rate": 0.00010738923319327281, "loss": 2.333, "step": 1361 }, { "epoch": 0.5291760763458877, "grad_norm": 0.72265625, "learning_rate": 0.0001072538914741488, "loss": 2.4401, "step": 1362 }, { "epoch": 0.5295646050363032, "grad_norm": 0.73046875, "learning_rate": 0.00010711853639618497, "loss": 2.3724, "step": 1363 }, { "epoch": 0.5299531337267186, "grad_norm": 0.7421875, "learning_rate": 0.00010698316820865264, "loss": 2.43, "step": 1364 }, { "epoch": 0.5303416624171341, "grad_norm": 0.71484375, "learning_rate": 0.00010684778716084722, "loss": 2.3521, "step": 1365 }, { "epoch": 0.5307301911075496, "grad_norm": 0.71875, "learning_rate": 0.0001067123935020878, "loss": 2.4063, "step": 1366 }, { "epoch": 0.5311187197979651, "grad_norm": 0.6875, "learning_rate": 0.0001065769874817167, "loss": 2.3809, "step": 1367 }, { "epoch": 0.5315072484883806, "grad_norm": 0.7421875, "learning_rate": 0.00010644156934909901, "loss": 2.3949, "step": 1368 }, { "epoch": 0.531895777178796, "grad_norm": 0.80859375, "learning_rate": 0.00010630613935362217, "loss": 2.3989, "step": 1369 }, { "epoch": 0.5322843058692115, "grad_norm": 0.7109375, "learning_rate": 0.00010617069774469543, "loss": 2.4125, "step": 1370 }, { "epoch": 0.532672834559627, "grad_norm": 0.703125, "learning_rate": 0.00010603524477174937, "loss": 2.3935, "step": 1371 }, { "epoch": 0.5330613632500425, "grad_norm": 0.74609375, "learning_rate": 0.00010589978068423561, "loss": 2.4038, "step": 1372 }, { "epoch": 0.533449891940458, "grad_norm": 0.6953125, "learning_rate": 0.00010576430573162612, "loss": 2.3936, "step": 1373 }, { "epoch": 0.5338384206308735, "grad_norm": 0.6796875, "learning_rate": 0.00010562882016341303, "loss": 2.336, "step": 1374 }, { "epoch": 0.5342269493212889, "grad_norm": 0.68359375, "learning_rate": 0.00010549332422910782, "loss": 2.3207, "step": 1375 }, { "epoch": 0.5346154780117044, "grad_norm": 0.72265625, "learning_rate": 0.00010535781817824126, "loss": 2.3884, "step": 1376 }, { "epoch": 0.5350040067021199, "grad_norm": 0.69140625, "learning_rate": 0.0001052223022603626, "loss": 2.3599, "step": 1377 }, { "epoch": 0.5353925353925354, "grad_norm": 0.6875, "learning_rate": 0.00010508677672503942, "loss": 2.4099, "step": 1378 }, { "epoch": 0.5357810640829509, "grad_norm": 0.69921875, "learning_rate": 0.00010495124182185679, "loss": 2.4312, "step": 1379 }, { "epoch": 0.5361695927733664, "grad_norm": 0.6875, "learning_rate": 0.00010481569780041723, "loss": 2.3607, "step": 1380 }, { "epoch": 0.5365581214637818, "grad_norm": 0.71484375, "learning_rate": 0.00010468014491033995, "loss": 2.4015, "step": 1381 }, { "epoch": 0.5369466501541973, "grad_norm": 0.6796875, "learning_rate": 0.00010454458340126059, "loss": 2.3508, "step": 1382 }, { "epoch": 0.5373351788446128, "grad_norm": 0.71484375, "learning_rate": 0.0001044090135228305, "loss": 2.368, "step": 1383 }, { "epoch": 0.5377237075350283, "grad_norm": 0.71875, "learning_rate": 0.00010427343552471657, "loss": 2.4, "step": 1384 }, { "epoch": 0.5381122362254438, "grad_norm": 0.73828125, "learning_rate": 0.00010413784965660063, "loss": 2.4134, "step": 1385 }, { "epoch": 0.5385007649158593, "grad_norm": 0.6953125, "learning_rate": 0.00010400225616817895, "loss": 2.3331, "step": 1386 }, { "epoch": 0.5388892936062747, "grad_norm": 0.75, "learning_rate": 0.00010386665530916191, "loss": 2.4752, "step": 1387 }, { "epoch": 0.5392778222966902, "grad_norm": 0.69140625, "learning_rate": 0.00010373104732927341, "loss": 2.396, "step": 1388 }, { "epoch": 0.5396663509871057, "grad_norm": 0.71484375, "learning_rate": 0.00010359543247825045, "loss": 2.3771, "step": 1389 }, { "epoch": 0.5400548796775212, "grad_norm": 0.69921875, "learning_rate": 0.00010345981100584272, "loss": 2.3218, "step": 1390 }, { "epoch": 0.5404434083679367, "grad_norm": 0.67578125, "learning_rate": 0.0001033241831618121, "loss": 2.2933, "step": 1391 }, { "epoch": 0.5408319370583522, "grad_norm": 0.71875, "learning_rate": 0.00010318854919593221, "loss": 2.4244, "step": 1392 }, { "epoch": 0.5412204657487676, "grad_norm": 0.67578125, "learning_rate": 0.00010305290935798792, "loss": 2.3525, "step": 1393 }, { "epoch": 0.5416089944391831, "grad_norm": 0.68359375, "learning_rate": 0.00010291726389777489, "loss": 2.3758, "step": 1394 }, { "epoch": 0.5419975231295986, "grad_norm": 0.73828125, "learning_rate": 0.00010278161306509923, "loss": 2.3396, "step": 1395 }, { "epoch": 0.5423860518200141, "grad_norm": 0.7265625, "learning_rate": 0.00010264595710977687, "loss": 2.3817, "step": 1396 }, { "epoch": 0.5427745805104296, "grad_norm": 0.6796875, "learning_rate": 0.00010251029628163316, "loss": 2.4236, "step": 1397 }, { "epoch": 0.5431631092008451, "grad_norm": 0.6953125, "learning_rate": 0.00010237463083050245, "loss": 2.3765, "step": 1398 }, { "epoch": 0.5435516378912605, "grad_norm": 0.71875, "learning_rate": 0.0001022389610062276, "loss": 2.3717, "step": 1399 }, { "epoch": 0.543940166581676, "grad_norm": 0.73046875, "learning_rate": 0.00010210328705865962, "loss": 2.3726, "step": 1400 }, { "epoch": 0.5443286952720915, "grad_norm": 0.6640625, "learning_rate": 0.00010196760923765689, "loss": 2.3417, "step": 1401 }, { "epoch": 0.544717223962507, "grad_norm": 0.7421875, "learning_rate": 0.00010183192779308512, "loss": 2.3906, "step": 1402 }, { "epoch": 0.5451057526529225, "grad_norm": 0.71875, "learning_rate": 0.00010169624297481664, "loss": 2.3572, "step": 1403 }, { "epoch": 0.545494281343338, "grad_norm": 0.6953125, "learning_rate": 0.00010156055503272996, "loss": 2.3263, "step": 1404 }, { "epoch": 0.5458828100337534, "grad_norm": 0.76171875, "learning_rate": 0.00010142486421670941, "loss": 2.3674, "step": 1405 }, { "epoch": 0.5462713387241689, "grad_norm": 0.6875, "learning_rate": 0.00010128917077664448, "loss": 2.3493, "step": 1406 }, { "epoch": 0.5466598674145844, "grad_norm": 0.76171875, "learning_rate": 0.00010115347496242964, "loss": 2.4162, "step": 1407 }, { "epoch": 0.5470483961049999, "grad_norm": 0.73828125, "learning_rate": 0.00010101777702396367, "loss": 2.4186, "step": 1408 }, { "epoch": 0.5474369247954154, "grad_norm": 0.66796875, "learning_rate": 0.00010088207721114922, "loss": 2.365, "step": 1409 }, { "epoch": 0.5478254534858309, "grad_norm": 0.69921875, "learning_rate": 0.00010074637577389253, "loss": 2.3712, "step": 1410 }, { "epoch": 0.5482139821762463, "grad_norm": 0.67578125, "learning_rate": 0.0001006106729621026, "loss": 2.3686, "step": 1411 }, { "epoch": 0.5486025108666618, "grad_norm": 0.6953125, "learning_rate": 0.00010047496902569122, "loss": 2.3628, "step": 1412 }, { "epoch": 0.5489910395570773, "grad_norm": 0.66796875, "learning_rate": 0.00010033926421457207, "loss": 2.34, "step": 1413 }, { "epoch": 0.5493795682474928, "grad_norm": 0.76171875, "learning_rate": 0.00010020355877866054, "loss": 2.359, "step": 1414 }, { "epoch": 0.5497680969379083, "grad_norm": 0.69140625, "learning_rate": 0.0001000678529678731, "loss": 2.3546, "step": 1415 }, { "epoch": 0.5501566256283238, "grad_norm": 0.69140625, "learning_rate": 9.993214703212696e-05, "loss": 2.3523, "step": 1416 }, { "epoch": 0.5505451543187392, "grad_norm": 0.72265625, "learning_rate": 9.97964412213395e-05, "loss": 2.3794, "step": 1417 }, { "epoch": 0.5509336830091547, "grad_norm": 0.83984375, "learning_rate": 9.966073578542795e-05, "loss": 2.3968, "step": 1418 }, { "epoch": 0.5513222116995702, "grad_norm": 0.7421875, "learning_rate": 9.952503097430882e-05, "loss": 2.4061, "step": 1419 }, { "epoch": 0.5517107403899857, "grad_norm": 0.73046875, "learning_rate": 9.938932703789743e-05, "loss": 2.3675, "step": 1420 }, { "epoch": 0.5520992690804012, "grad_norm": 0.6953125, "learning_rate": 9.925362422610754e-05, "loss": 2.3192, "step": 1421 }, { "epoch": 0.5524877977708167, "grad_norm": 0.71484375, "learning_rate": 9.91179227888508e-05, "loss": 2.354, "step": 1422 }, { "epoch": 0.5528763264612321, "grad_norm": 0.67578125, "learning_rate": 9.898222297603639e-05, "loss": 2.3639, "step": 1423 }, { "epoch": 0.5532648551516476, "grad_norm": 0.7109375, "learning_rate": 9.884652503757041e-05, "loss": 2.4172, "step": 1424 }, { "epoch": 0.5536533838420631, "grad_norm": 0.69140625, "learning_rate": 9.871082922335554e-05, "loss": 2.3958, "step": 1425 }, { "epoch": 0.5540419125324786, "grad_norm": 0.6953125, "learning_rate": 9.857513578329063e-05, "loss": 2.3441, "step": 1426 }, { "epoch": 0.5544304412228941, "grad_norm": 0.65625, "learning_rate": 9.843944496727007e-05, "loss": 2.3684, "step": 1427 }, { "epoch": 0.5548189699133096, "grad_norm": 0.76171875, "learning_rate": 9.830375702518338e-05, "loss": 2.3754, "step": 1428 }, { "epoch": 0.555207498603725, "grad_norm": 0.67578125, "learning_rate": 9.81680722069149e-05, "loss": 2.3649, "step": 1429 }, { "epoch": 0.5555960272941405, "grad_norm": 0.69140625, "learning_rate": 9.803239076234312e-05, "loss": 2.4059, "step": 1430 }, { "epoch": 0.555984555984556, "grad_norm": 0.7109375, "learning_rate": 9.78967129413404e-05, "loss": 2.3236, "step": 1431 }, { "epoch": 0.5563730846749715, "grad_norm": 0.6953125, "learning_rate": 9.77610389937724e-05, "loss": 2.3588, "step": 1432 }, { "epoch": 0.556761613365387, "grad_norm": 0.74609375, "learning_rate": 9.762536916949759e-05, "loss": 2.4225, "step": 1433 }, { "epoch": 0.5571501420558025, "grad_norm": 0.73046875, "learning_rate": 9.748970371836688e-05, "loss": 2.4599, "step": 1434 }, { "epoch": 0.5575386707462179, "grad_norm": 0.66796875, "learning_rate": 9.735404289022316e-05, "loss": 2.2971, "step": 1435 }, { "epoch": 0.5579271994366334, "grad_norm": 0.6796875, "learning_rate": 9.721838693490079e-05, "loss": 2.2642, "step": 1436 }, { "epoch": 0.5583157281270489, "grad_norm": 0.671875, "learning_rate": 9.708273610222512e-05, "loss": 2.3131, "step": 1437 }, { "epoch": 0.5587042568174644, "grad_norm": 0.74609375, "learning_rate": 9.69470906420121e-05, "loss": 2.3757, "step": 1438 }, { "epoch": 0.5590927855078799, "grad_norm": 0.69921875, "learning_rate": 9.68114508040678e-05, "loss": 2.3271, "step": 1439 }, { "epoch": 0.5594813141982953, "grad_norm": 0.6953125, "learning_rate": 9.667581683818791e-05, "loss": 2.3897, "step": 1440 }, { "epoch": 0.5598698428887108, "grad_norm": 0.703125, "learning_rate": 9.65401889941573e-05, "loss": 2.4096, "step": 1441 }, { "epoch": 0.5602583715791263, "grad_norm": 0.67578125, "learning_rate": 9.640456752174957e-05, "loss": 2.3779, "step": 1442 }, { "epoch": 0.5606469002695418, "grad_norm": 0.69140625, "learning_rate": 9.626895267072662e-05, "loss": 2.3463, "step": 1443 }, { "epoch": 0.5610354289599573, "grad_norm": 0.67578125, "learning_rate": 9.613334469083811e-05, "loss": 2.2979, "step": 1444 }, { "epoch": 0.5614239576503728, "grad_norm": 0.65625, "learning_rate": 9.599774383182106e-05, "loss": 2.3747, "step": 1445 }, { "epoch": 0.5618124863407882, "grad_norm": 0.7578125, "learning_rate": 9.586215034339939e-05, "loss": 2.3214, "step": 1446 }, { "epoch": 0.5622010150312037, "grad_norm": 0.7578125, "learning_rate": 9.572656447528345e-05, "loss": 2.398, "step": 1447 }, { "epoch": 0.5625895437216192, "grad_norm": 0.765625, "learning_rate": 9.559098647716952e-05, "loss": 2.4058, "step": 1448 }, { "epoch": 0.5629780724120347, "grad_norm": 0.72265625, "learning_rate": 9.545541659873945e-05, "loss": 2.425, "step": 1449 }, { "epoch": 0.5633666011024502, "grad_norm": 0.71875, "learning_rate": 9.531985508966004e-05, "loss": 2.3954, "step": 1450 }, { "epoch": 0.5637551297928657, "grad_norm": 0.78515625, "learning_rate": 9.518430219958277e-05, "loss": 2.3367, "step": 1451 }, { "epoch": 0.5641436584832811, "grad_norm": 0.71484375, "learning_rate": 9.504875817814321e-05, "loss": 2.3757, "step": 1452 }, { "epoch": 0.5645321871736966, "grad_norm": 0.66796875, "learning_rate": 9.491322327496062e-05, "loss": 2.3418, "step": 1453 }, { "epoch": 0.5649207158641121, "grad_norm": 0.6875, "learning_rate": 9.477769773963738e-05, "loss": 2.3674, "step": 1454 }, { "epoch": 0.5653092445545276, "grad_norm": 0.73828125, "learning_rate": 9.464218182175875e-05, "loss": 2.3464, "step": 1455 }, { "epoch": 0.5656977732449431, "grad_norm": 0.72265625, "learning_rate": 9.450667577089217e-05, "loss": 2.3947, "step": 1456 }, { "epoch": 0.5660863019353586, "grad_norm": 0.70703125, "learning_rate": 9.437117983658701e-05, "loss": 2.3727, "step": 1457 }, { "epoch": 0.566474830625774, "grad_norm": 0.6875, "learning_rate": 9.423569426837388e-05, "loss": 2.4071, "step": 1458 }, { "epoch": 0.5668633593161895, "grad_norm": 0.71484375, "learning_rate": 9.41002193157644e-05, "loss": 2.3527, "step": 1459 }, { "epoch": 0.567251888006605, "grad_norm": 0.7578125, "learning_rate": 9.396475522825066e-05, "loss": 2.3901, "step": 1460 }, { "epoch": 0.5676404166970205, "grad_norm": 0.734375, "learning_rate": 9.382930225530462e-05, "loss": 2.4118, "step": 1461 }, { "epoch": 0.568028945387436, "grad_norm": 0.86328125, "learning_rate": 9.369386064637785e-05, "loss": 2.4239, "step": 1462 }, { "epoch": 0.5684174740778515, "grad_norm": 0.6796875, "learning_rate": 9.355843065090104e-05, "loss": 2.4176, "step": 1463 }, { "epoch": 0.5688060027682669, "grad_norm": 0.78515625, "learning_rate": 9.342301251828336e-05, "loss": 2.4052, "step": 1464 }, { "epoch": 0.5691945314586824, "grad_norm": 0.7109375, "learning_rate": 9.328760649791227e-05, "loss": 2.4543, "step": 1465 }, { "epoch": 0.5695830601490979, "grad_norm": 0.81640625, "learning_rate": 9.315221283915282e-05, "loss": 2.3569, "step": 1466 }, { "epoch": 0.5699715888395134, "grad_norm": 0.7109375, "learning_rate": 9.301683179134741e-05, "loss": 2.4172, "step": 1467 }, { "epoch": 0.5703601175299289, "grad_norm": 0.76953125, "learning_rate": 9.288146360381507e-05, "loss": 2.4128, "step": 1468 }, { "epoch": 0.5707486462203444, "grad_norm": 0.765625, "learning_rate": 9.274610852585127e-05, "loss": 2.347, "step": 1469 }, { "epoch": 0.5711371749107598, "grad_norm": 0.74609375, "learning_rate": 9.261076680672724e-05, "loss": 2.3291, "step": 1470 }, { "epoch": 0.5715257036011753, "grad_norm": 0.80859375, "learning_rate": 9.24754386956897e-05, "loss": 2.3808, "step": 1471 }, { "epoch": 0.5719142322915908, "grad_norm": 0.69921875, "learning_rate": 9.234012444196029e-05, "loss": 2.3653, "step": 1472 }, { "epoch": 0.5723027609820063, "grad_norm": 0.80078125, "learning_rate": 9.220482429473502e-05, "loss": 2.3972, "step": 1473 }, { "epoch": 0.5726912896724218, "grad_norm": 0.6640625, "learning_rate": 9.206953850318408e-05, "loss": 2.3442, "step": 1474 }, { "epoch": 0.5730798183628373, "grad_norm": 0.74609375, "learning_rate": 9.193426731645112e-05, "loss": 2.4275, "step": 1475 }, { "epoch": 0.5734683470532527, "grad_norm": 0.71875, "learning_rate": 9.179901098365297e-05, "loss": 2.3666, "step": 1476 }, { "epoch": 0.5738568757436682, "grad_norm": 0.71484375, "learning_rate": 9.166376975387898e-05, "loss": 2.3713, "step": 1477 }, { "epoch": 0.5742454044340837, "grad_norm": 0.74609375, "learning_rate": 9.152854387619081e-05, "loss": 2.399, "step": 1478 }, { "epoch": 0.5746339331244992, "grad_norm": 0.734375, "learning_rate": 9.139333359962179e-05, "loss": 2.35, "step": 1479 }, { "epoch": 0.5750224618149147, "grad_norm": 0.6484375, "learning_rate": 9.125813917317656e-05, "loss": 2.3904, "step": 1480 }, { "epoch": 0.5754109905053302, "grad_norm": 0.7421875, "learning_rate": 9.112296084583051e-05, "loss": 2.4128, "step": 1481 }, { "epoch": 0.5757995191957456, "grad_norm": 0.68359375, "learning_rate": 9.098779886652941e-05, "loss": 2.3246, "step": 1482 }, { "epoch": 0.5761880478861611, "grad_norm": 0.6875, "learning_rate": 9.085265348418894e-05, "loss": 2.3642, "step": 1483 }, { "epoch": 0.5765765765765766, "grad_norm": 0.67578125, "learning_rate": 9.071752494769422e-05, "loss": 2.3825, "step": 1484 }, { "epoch": 0.5769651052669921, "grad_norm": 0.6953125, "learning_rate": 9.058241350589929e-05, "loss": 2.3952, "step": 1485 }, { "epoch": 0.5773536339574076, "grad_norm": 0.69140625, "learning_rate": 9.044731940762675e-05, "loss": 2.3592, "step": 1486 }, { "epoch": 0.5777421626478231, "grad_norm": 0.69921875, "learning_rate": 9.031224290166727e-05, "loss": 2.4437, "step": 1487 }, { "epoch": 0.5781306913382385, "grad_norm": 0.7109375, "learning_rate": 9.017718423677911e-05, "loss": 2.4009, "step": 1488 }, { "epoch": 0.578519220028654, "grad_norm": 0.6796875, "learning_rate": 9.004214366168769e-05, "loss": 2.3155, "step": 1489 }, { "epoch": 0.5789077487190695, "grad_norm": 0.70703125, "learning_rate": 8.990712142508505e-05, "loss": 2.4467, "step": 1490 }, { "epoch": 0.579296277409485, "grad_norm": 0.734375, "learning_rate": 8.977211777562953e-05, "loss": 2.3164, "step": 1491 }, { "epoch": 0.5796848060999005, "grad_norm": 0.703125, "learning_rate": 8.963713296194521e-05, "loss": 2.3961, "step": 1492 }, { "epoch": 0.580073334790316, "grad_norm": 0.68359375, "learning_rate": 8.950216723262152e-05, "loss": 2.3545, "step": 1493 }, { "epoch": 0.5804618634807314, "grad_norm": 0.67578125, "learning_rate": 8.936722083621265e-05, "loss": 2.4041, "step": 1494 }, { "epoch": 0.5808503921711469, "grad_norm": 0.6875, "learning_rate": 8.923229402123728e-05, "loss": 2.3851, "step": 1495 }, { "epoch": 0.5812389208615624, "grad_norm": 0.74609375, "learning_rate": 8.909738703617799e-05, "loss": 2.3893, "step": 1496 }, { "epoch": 0.5816274495519779, "grad_norm": 0.765625, "learning_rate": 8.896250012948086e-05, "loss": 2.2755, "step": 1497 }, { "epoch": 0.5820159782423934, "grad_norm": 0.70703125, "learning_rate": 8.882763354955495e-05, "loss": 2.4155, "step": 1498 }, { "epoch": 0.5824045069328089, "grad_norm": 0.66015625, "learning_rate": 8.869278754477193e-05, "loss": 2.382, "step": 1499 }, { "epoch": 0.5827930356232243, "grad_norm": 0.65625, "learning_rate": 8.855796236346557e-05, "loss": 2.3534, "step": 1500 }, { "epoch": 0.5831815643136398, "grad_norm": 0.65234375, "learning_rate": 8.84231582539313e-05, "loss": 2.3147, "step": 1501 }, { "epoch": 0.5835700930040553, "grad_norm": 0.72265625, "learning_rate": 8.82883754644257e-05, "loss": 2.3962, "step": 1502 }, { "epoch": 0.5839586216944708, "grad_norm": 0.6640625, "learning_rate": 8.815361424316616e-05, "loss": 2.3999, "step": 1503 }, { "epoch": 0.5843471503848863, "grad_norm": 0.70703125, "learning_rate": 8.801887483833023e-05, "loss": 2.3827, "step": 1504 }, { "epoch": 0.5847356790753017, "grad_norm": 0.6640625, "learning_rate": 8.788415749805541e-05, "loss": 2.3867, "step": 1505 }, { "epoch": 0.5851242077657172, "grad_norm": 0.6796875, "learning_rate": 8.774946247043852e-05, "loss": 2.3742, "step": 1506 }, { "epoch": 0.5855127364561327, "grad_norm": 0.6953125, "learning_rate": 8.761479000353528e-05, "loss": 2.3596, "step": 1507 }, { "epoch": 0.5859012651465482, "grad_norm": 0.64453125, "learning_rate": 8.748014034535983e-05, "loss": 2.3165, "step": 1508 }, { "epoch": 0.5862897938369637, "grad_norm": 0.65234375, "learning_rate": 8.734551374388436e-05, "loss": 2.3892, "step": 1509 }, { "epoch": 0.5866783225273792, "grad_norm": 0.67578125, "learning_rate": 8.72109104470386e-05, "loss": 2.3335, "step": 1510 }, { "epoch": 0.5870668512177946, "grad_norm": 0.72265625, "learning_rate": 8.707633070270932e-05, "loss": 2.4179, "step": 1511 }, { "epoch": 0.5874553799082101, "grad_norm": 0.6796875, "learning_rate": 8.694177475873997e-05, "loss": 2.3839, "step": 1512 }, { "epoch": 0.5878439085986256, "grad_norm": 0.69140625, "learning_rate": 8.680724286293008e-05, "loss": 2.3786, "step": 1513 }, { "epoch": 0.5882324372890411, "grad_norm": 0.6796875, "learning_rate": 8.667273526303497e-05, "loss": 2.3887, "step": 1514 }, { "epoch": 0.5886209659794566, "grad_norm": 0.71484375, "learning_rate": 8.653825220676521e-05, "loss": 2.461, "step": 1515 }, { "epoch": 0.5890094946698721, "grad_norm": 0.69140625, "learning_rate": 8.640379394178617e-05, "loss": 2.3804, "step": 1516 }, { "epoch": 0.5893980233602875, "grad_norm": 0.66015625, "learning_rate": 8.62693607157175e-05, "loss": 2.3552, "step": 1517 }, { "epoch": 0.589786552050703, "grad_norm": 0.6875, "learning_rate": 8.613495277613281e-05, "loss": 2.3252, "step": 1518 }, { "epoch": 0.5901750807411185, "grad_norm": 0.6484375, "learning_rate": 8.600057037055914e-05, "loss": 2.3619, "step": 1519 }, { "epoch": 0.590563609431534, "grad_norm": 0.6953125, "learning_rate": 8.586621374647646e-05, "loss": 2.3739, "step": 1520 }, { "epoch": 0.5909521381219495, "grad_norm": 0.80078125, "learning_rate": 8.573188315131728e-05, "loss": 2.4229, "step": 1521 }, { "epoch": 0.591340666812365, "grad_norm": 0.6484375, "learning_rate": 8.559757883246617e-05, "loss": 2.3038, "step": 1522 }, { "epoch": 0.5917291955027804, "grad_norm": 0.68359375, "learning_rate": 8.546330103725937e-05, "loss": 2.3077, "step": 1523 }, { "epoch": 0.5921177241931959, "grad_norm": 0.64453125, "learning_rate": 8.53290500129842e-05, "loss": 2.3405, "step": 1524 }, { "epoch": 0.5925062528836114, "grad_norm": 0.68359375, "learning_rate": 8.51948260068787e-05, "loss": 2.3246, "step": 1525 }, { "epoch": 0.5928947815740269, "grad_norm": 0.6796875, "learning_rate": 8.506062926613112e-05, "loss": 2.4295, "step": 1526 }, { "epoch": 0.5932833102644424, "grad_norm": 0.63671875, "learning_rate": 8.492646003787958e-05, "loss": 2.3574, "step": 1527 }, { "epoch": 0.5936718389548579, "grad_norm": 0.69140625, "learning_rate": 8.479231856921148e-05, "loss": 2.4245, "step": 1528 }, { "epoch": 0.5940603676452733, "grad_norm": 0.671875, "learning_rate": 8.465820510716311e-05, "loss": 2.3591, "step": 1529 }, { "epoch": 0.5944488963356888, "grad_norm": 0.7109375, "learning_rate": 8.452411989871916e-05, "loss": 2.4629, "step": 1530 }, { "epoch": 0.5948374250261043, "grad_norm": 0.66015625, "learning_rate": 8.439006319081228e-05, "loss": 2.3193, "step": 1531 }, { "epoch": 0.5952259537165198, "grad_norm": 0.69921875, "learning_rate": 8.425603523032269e-05, "loss": 2.3394, "step": 1532 }, { "epoch": 0.5956144824069353, "grad_norm": 0.65234375, "learning_rate": 8.412203626407766e-05, "loss": 2.322, "step": 1533 }, { "epoch": 0.5960030110973508, "grad_norm": 0.6796875, "learning_rate": 8.398806653885098e-05, "loss": 2.3262, "step": 1534 }, { "epoch": 0.5963915397877662, "grad_norm": 0.69921875, "learning_rate": 8.385412630136267e-05, "loss": 2.3941, "step": 1535 }, { "epoch": 0.5967800684781817, "grad_norm": 0.73046875, "learning_rate": 8.372021579827844e-05, "loss": 2.3708, "step": 1536 }, { "epoch": 0.5971685971685972, "grad_norm": 0.7421875, "learning_rate": 8.358633527620923e-05, "loss": 2.3973, "step": 1537 }, { "epoch": 0.5975571258590127, "grad_norm": 15.1875, "learning_rate": 8.345248498171073e-05, "loss": 2.3555, "step": 1538 }, { "epoch": 0.5979456545494282, "grad_norm": 0.765625, "learning_rate": 8.331866516128302e-05, "loss": 2.3586, "step": 1539 }, { "epoch": 0.5983341832398437, "grad_norm": 0.74609375, "learning_rate": 8.318487606137001e-05, "loss": 2.3895, "step": 1540 }, { "epoch": 0.598722711930259, "grad_norm": 0.70703125, "learning_rate": 8.305111792835911e-05, "loss": 2.341, "step": 1541 }, { "epoch": 0.5991112406206746, "grad_norm": 0.63671875, "learning_rate": 8.291739100858058e-05, "loss": 2.3371, "step": 1542 }, { "epoch": 0.5994997693110901, "grad_norm": 0.65234375, "learning_rate": 8.278369554830729e-05, "loss": 2.3742, "step": 1543 }, { "epoch": 0.5998882980015056, "grad_norm": 0.6640625, "learning_rate": 8.265003179375418e-05, "loss": 2.394, "step": 1544 }, { "epoch": 0.6002768266919211, "grad_norm": 0.70703125, "learning_rate": 8.251639999107776e-05, "loss": 2.3719, "step": 1545 }, { "epoch": 0.6006653553823366, "grad_norm": 0.71875, "learning_rate": 8.238280038637566e-05, "loss": 2.3728, "step": 1546 }, { "epoch": 0.601053884072752, "grad_norm": 0.68359375, "learning_rate": 8.224923322568637e-05, "loss": 2.3766, "step": 1547 }, { "epoch": 0.6014424127631675, "grad_norm": 0.66015625, "learning_rate": 8.211569875498837e-05, "loss": 2.3564, "step": 1548 }, { "epoch": 0.601830941453583, "grad_norm": 0.6640625, "learning_rate": 8.198219722020019e-05, "loss": 2.3739, "step": 1549 }, { "epoch": 0.6022194701439985, "grad_norm": 0.71484375, "learning_rate": 8.184872886717956e-05, "loss": 2.4159, "step": 1550 }, { "epoch": 0.602607998834414, "grad_norm": 0.734375, "learning_rate": 8.17152939417232e-05, "loss": 2.3322, "step": 1551 }, { "epoch": 0.6029965275248295, "grad_norm": 0.69921875, "learning_rate": 8.158189268956619e-05, "loss": 2.3703, "step": 1552 }, { "epoch": 0.6033850562152449, "grad_norm": 0.65625, "learning_rate": 8.144852535638162e-05, "loss": 2.3311, "step": 1553 }, { "epoch": 0.6037735849056604, "grad_norm": 0.6875, "learning_rate": 8.131519218778008e-05, "loss": 2.3832, "step": 1554 }, { "epoch": 0.6041621135960759, "grad_norm": 0.640625, "learning_rate": 8.118189342930936e-05, "loss": 2.3851, "step": 1555 }, { "epoch": 0.6045506422864914, "grad_norm": 0.703125, "learning_rate": 8.104862932645374e-05, "loss": 2.3566, "step": 1556 }, { "epoch": 0.6049391709769069, "grad_norm": 0.671875, "learning_rate": 8.091540012463375e-05, "loss": 2.3132, "step": 1557 }, { "epoch": 0.6053276996673224, "grad_norm": 0.66015625, "learning_rate": 8.078220606920564e-05, "loss": 2.3965, "step": 1558 }, { "epoch": 0.6057162283577378, "grad_norm": 0.72265625, "learning_rate": 8.064904740546092e-05, "loss": 2.4171, "step": 1559 }, { "epoch": 0.6061047570481533, "grad_norm": 0.6875, "learning_rate": 8.051592437862597e-05, "loss": 2.3941, "step": 1560 }, { "epoch": 0.6064932857385688, "grad_norm": 0.69921875, "learning_rate": 8.038283723386144e-05, "loss": 2.4175, "step": 1561 }, { "epoch": 0.6068818144289843, "grad_norm": 0.6640625, "learning_rate": 8.024978621626199e-05, "loss": 2.3083, "step": 1562 }, { "epoch": 0.6072703431193998, "grad_norm": 0.70703125, "learning_rate": 8.011677157085571e-05, "loss": 2.3282, "step": 1563 }, { "epoch": 0.6076588718098153, "grad_norm": 0.68359375, "learning_rate": 7.998379354260375e-05, "loss": 2.3535, "step": 1564 }, { "epoch": 0.6080474005002307, "grad_norm": 0.71875, "learning_rate": 7.985085237639973e-05, "loss": 2.424, "step": 1565 }, { "epoch": 0.6084359291906462, "grad_norm": 0.734375, "learning_rate": 7.971794831706947e-05, "loss": 2.2892, "step": 1566 }, { "epoch": 0.6088244578810617, "grad_norm": 0.7578125, "learning_rate": 7.958508160937044e-05, "loss": 2.315, "step": 1567 }, { "epoch": 0.6092129865714772, "grad_norm": 0.7265625, "learning_rate": 7.94522524979913e-05, "loss": 2.3734, "step": 1568 }, { "epoch": 0.6096015152618927, "grad_norm": 0.6640625, "learning_rate": 7.931946122755149e-05, "loss": 2.333, "step": 1569 }, { "epoch": 0.6099900439523082, "grad_norm": 0.6640625, "learning_rate": 7.918670804260074e-05, "loss": 2.3811, "step": 1570 }, { "epoch": 0.6103785726427235, "grad_norm": 0.66796875, "learning_rate": 7.905399318761869e-05, "loss": 2.362, "step": 1571 }, { "epoch": 0.610767101333139, "grad_norm": 0.66015625, "learning_rate": 7.89213169070143e-05, "loss": 2.3697, "step": 1572 }, { "epoch": 0.6111556300235546, "grad_norm": 0.65234375, "learning_rate": 7.878867944512561e-05, "loss": 2.3693, "step": 1573 }, { "epoch": 0.61154415871397, "grad_norm": 0.671875, "learning_rate": 7.865608104621907e-05, "loss": 2.3939, "step": 1574 }, { "epoch": 0.6119326874043856, "grad_norm": 0.65234375, "learning_rate": 7.852352195448923e-05, "loss": 2.4086, "step": 1575 }, { "epoch": 0.612321216094801, "grad_norm": 0.69921875, "learning_rate": 7.839100241405828e-05, "loss": 2.3811, "step": 1576 }, { "epoch": 0.6127097447852164, "grad_norm": 0.6328125, "learning_rate": 7.825852266897553e-05, "loss": 2.2625, "step": 1577 }, { "epoch": 0.613098273475632, "grad_norm": 0.66796875, "learning_rate": 7.8126082963217e-05, "loss": 2.4848, "step": 1578 }, { "epoch": 0.6134868021660475, "grad_norm": 0.72265625, "learning_rate": 7.799368354068502e-05, "loss": 2.4191, "step": 1579 }, { "epoch": 0.613875330856463, "grad_norm": 0.6484375, "learning_rate": 7.786132464520768e-05, "loss": 2.3593, "step": 1580 }, { "epoch": 0.6142638595468785, "grad_norm": 0.67578125, "learning_rate": 7.772900652053848e-05, "loss": 2.3714, "step": 1581 }, { "epoch": 0.6146523882372938, "grad_norm": 0.6875, "learning_rate": 7.759672941035579e-05, "loss": 2.3637, "step": 1582 }, { "epoch": 0.6150409169277093, "grad_norm": 0.6328125, "learning_rate": 7.746449355826246e-05, "loss": 2.2919, "step": 1583 }, { "epoch": 0.6154294456181248, "grad_norm": 0.6796875, "learning_rate": 7.733229920778542e-05, "loss": 2.3922, "step": 1584 }, { "epoch": 0.6158179743085404, "grad_norm": 0.64453125, "learning_rate": 7.720014660237512e-05, "loss": 2.4041, "step": 1585 }, { "epoch": 0.6162065029989559, "grad_norm": 0.65625, "learning_rate": 7.706803598540506e-05, "loss": 2.3844, "step": 1586 }, { "epoch": 0.6165950316893714, "grad_norm": 0.671875, "learning_rate": 7.693596760017156e-05, "loss": 2.4035, "step": 1587 }, { "epoch": 0.6169835603797867, "grad_norm": 0.67578125, "learning_rate": 7.680394168989306e-05, "loss": 2.3288, "step": 1588 }, { "epoch": 0.6173720890702022, "grad_norm": 0.6796875, "learning_rate": 7.66719584977098e-05, "loss": 2.3953, "step": 1589 }, { "epoch": 0.6177606177606177, "grad_norm": 0.6640625, "learning_rate": 7.654001826668342e-05, "loss": 2.3341, "step": 1590 }, { "epoch": 0.6181491464510332, "grad_norm": 0.66796875, "learning_rate": 7.640812123979635e-05, "loss": 2.3844, "step": 1591 }, { "epoch": 0.6185376751414488, "grad_norm": 0.6328125, "learning_rate": 7.627626765995144e-05, "loss": 2.3106, "step": 1592 }, { "epoch": 0.6189262038318643, "grad_norm": 0.76171875, "learning_rate": 7.61444577699716e-05, "loss": 2.3895, "step": 1593 }, { "epoch": 0.6193147325222796, "grad_norm": 0.6796875, "learning_rate": 7.601269181259926e-05, "loss": 2.3381, "step": 1594 }, { "epoch": 0.6197032612126951, "grad_norm": 0.63671875, "learning_rate": 7.588097003049596e-05, "loss": 2.3945, "step": 1595 }, { "epoch": 0.6200917899031106, "grad_norm": 0.640625, "learning_rate": 7.574929266624185e-05, "loss": 2.3523, "step": 1596 }, { "epoch": 0.6204803185935261, "grad_norm": 0.66015625, "learning_rate": 7.561765996233527e-05, "loss": 2.3788, "step": 1597 }, { "epoch": 0.6208688472839416, "grad_norm": 0.6875, "learning_rate": 7.548607216119236e-05, "loss": 2.426, "step": 1598 }, { "epoch": 0.6212573759743572, "grad_norm": 0.625, "learning_rate": 7.535452950514655e-05, "loss": 2.2676, "step": 1599 }, { "epoch": 0.6216459046647725, "grad_norm": 0.75390625, "learning_rate": 7.522303223644814e-05, "loss": 2.3779, "step": 1600 }, { "epoch": 0.622034433355188, "grad_norm": 0.62890625, "learning_rate": 7.509158059726375e-05, "loss": 2.299, "step": 1601 }, { "epoch": 0.6224229620456035, "grad_norm": 0.68359375, "learning_rate": 7.496017482967613e-05, "loss": 2.3966, "step": 1602 }, { "epoch": 0.622811490736019, "grad_norm": 0.6796875, "learning_rate": 7.482881517568343e-05, "loss": 2.3278, "step": 1603 }, { "epoch": 0.6232000194264345, "grad_norm": 0.703125, "learning_rate": 7.469750187719895e-05, "loss": 2.3683, "step": 1604 }, { "epoch": 0.62358854811685, "grad_norm": 0.66796875, "learning_rate": 7.456623517605051e-05, "loss": 2.3648, "step": 1605 }, { "epoch": 0.6239770768072654, "grad_norm": 0.671875, "learning_rate": 7.443501531398027e-05, "loss": 2.3241, "step": 1606 }, { "epoch": 0.6243656054976809, "grad_norm": 0.640625, "learning_rate": 7.430384253264402e-05, "loss": 2.3535, "step": 1607 }, { "epoch": 0.6247541341880964, "grad_norm": 0.6796875, "learning_rate": 7.41727170736109e-05, "loss": 2.3948, "step": 1608 }, { "epoch": 0.625142662878512, "grad_norm": 0.62890625, "learning_rate": 7.404163917836284e-05, "loss": 2.3515, "step": 1609 }, { "epoch": 0.6255311915689274, "grad_norm": 0.67578125, "learning_rate": 7.391060908829426e-05, "loss": 2.3847, "step": 1610 }, { "epoch": 0.625919720259343, "grad_norm": 0.67578125, "learning_rate": 7.377962704471147e-05, "loss": 2.4048, "step": 1611 }, { "epoch": 0.6263082489497583, "grad_norm": 0.66796875, "learning_rate": 7.364869328883232e-05, "loss": 2.4245, "step": 1612 }, { "epoch": 0.6266967776401738, "grad_norm": 0.765625, "learning_rate": 7.351780806178577e-05, "loss": 2.3432, "step": 1613 }, { "epoch": 0.6270853063305893, "grad_norm": 0.63671875, "learning_rate": 7.338697160461135e-05, "loss": 2.425, "step": 1614 }, { "epoch": 0.6274738350210048, "grad_norm": 0.65234375, "learning_rate": 7.325618415825878e-05, "loss": 2.2868, "step": 1615 }, { "epoch": 0.6278623637114203, "grad_norm": 0.74609375, "learning_rate": 7.31254459635876e-05, "loss": 2.3369, "step": 1616 }, { "epoch": 0.6282508924018358, "grad_norm": 0.6640625, "learning_rate": 7.299475726136655e-05, "loss": 2.3966, "step": 1617 }, { "epoch": 0.6286394210922512, "grad_norm": 0.6328125, "learning_rate": 7.286411829227326e-05, "loss": 2.3717, "step": 1618 }, { "epoch": 0.6290279497826667, "grad_norm": 0.640625, "learning_rate": 7.27335292968938e-05, "loss": 2.3432, "step": 1619 }, { "epoch": 0.6294164784730822, "grad_norm": 0.66015625, "learning_rate": 7.260299051572216e-05, "loss": 2.3927, "step": 1620 }, { "epoch": 0.6298050071634977, "grad_norm": 0.71875, "learning_rate": 7.24725021891599e-05, "loss": 2.3983, "step": 1621 }, { "epoch": 0.6301935358539132, "grad_norm": 0.6484375, "learning_rate": 7.234206455751562e-05, "loss": 2.2967, "step": 1622 }, { "epoch": 0.6305820645443287, "grad_norm": 0.69140625, "learning_rate": 7.221167786100458e-05, "loss": 2.4191, "step": 1623 }, { "epoch": 0.6309705932347441, "grad_norm": 0.67578125, "learning_rate": 7.208134233974825e-05, "loss": 2.3658, "step": 1624 }, { "epoch": 0.6313591219251596, "grad_norm": 0.62890625, "learning_rate": 7.195105823377384e-05, "loss": 2.3469, "step": 1625 }, { "epoch": 0.6317476506155751, "grad_norm": 0.65234375, "learning_rate": 7.182082578301388e-05, "loss": 2.3892, "step": 1626 }, { "epoch": 0.6321361793059906, "grad_norm": 0.64453125, "learning_rate": 7.169064522730573e-05, "loss": 2.3862, "step": 1627 }, { "epoch": 0.6325247079964061, "grad_norm": 0.6171875, "learning_rate": 7.156051680639126e-05, "loss": 2.3799, "step": 1628 }, { "epoch": 0.6329132366868216, "grad_norm": 0.6796875, "learning_rate": 7.143044075991626e-05, "loss": 2.3772, "step": 1629 }, { "epoch": 0.633301765377237, "grad_norm": 0.65625, "learning_rate": 7.130041732743014e-05, "loss": 2.2959, "step": 1630 }, { "epoch": 0.6336902940676525, "grad_norm": 0.64453125, "learning_rate": 7.117044674838527e-05, "loss": 2.4059, "step": 1631 }, { "epoch": 0.634078822758068, "grad_norm": 0.64453125, "learning_rate": 7.104052926213687e-05, "loss": 2.3168, "step": 1632 }, { "epoch": 0.6344673514484835, "grad_norm": 0.62890625, "learning_rate": 7.091066510794224e-05, "loss": 2.3378, "step": 1633 }, { "epoch": 0.634855880138899, "grad_norm": 0.671875, "learning_rate": 7.07808545249606e-05, "loss": 2.3458, "step": 1634 }, { "epoch": 0.6352444088293145, "grad_norm": 0.6328125, "learning_rate": 7.065109775225231e-05, "loss": 2.3979, "step": 1635 }, { "epoch": 0.6356329375197299, "grad_norm": 0.6484375, "learning_rate": 7.052139502877886e-05, "loss": 2.3766, "step": 1636 }, { "epoch": 0.6360214662101454, "grad_norm": 0.65625, "learning_rate": 7.039174659340202e-05, "loss": 2.3896, "step": 1637 }, { "epoch": 0.6364099949005609, "grad_norm": 0.6796875, "learning_rate": 7.026215268488367e-05, "loss": 2.3584, "step": 1638 }, { "epoch": 0.6367985235909764, "grad_norm": 0.640625, "learning_rate": 7.013261354188529e-05, "loss": 2.3304, "step": 1639 }, { "epoch": 0.6371870522813919, "grad_norm": 0.6640625, "learning_rate": 7.00031294029675e-05, "loss": 2.3706, "step": 1640 }, { "epoch": 0.6375755809718073, "grad_norm": 0.65234375, "learning_rate": 6.98737005065895e-05, "loss": 2.3809, "step": 1641 }, { "epoch": 0.6379641096622228, "grad_norm": 0.62890625, "learning_rate": 6.974432709110895e-05, "loss": 2.337, "step": 1642 }, { "epoch": 0.6383526383526383, "grad_norm": 0.6171875, "learning_rate": 6.961500939478118e-05, "loss": 2.4023, "step": 1643 }, { "epoch": 0.6387411670430538, "grad_norm": 0.67578125, "learning_rate": 6.948574765575902e-05, "loss": 2.3571, "step": 1644 }, { "epoch": 0.6391296957334693, "grad_norm": 0.6328125, "learning_rate": 6.935654211209213e-05, "loss": 2.3675, "step": 1645 }, { "epoch": 0.6395182244238848, "grad_norm": 0.6484375, "learning_rate": 6.922739300172678e-05, "loss": 2.3294, "step": 1646 }, { "epoch": 0.6399067531143002, "grad_norm": 0.65625, "learning_rate": 6.909830056250527e-05, "loss": 2.3399, "step": 1647 }, { "epoch": 0.6402952818047157, "grad_norm": 0.66015625, "learning_rate": 6.896926503216556e-05, "loss": 2.4404, "step": 1648 }, { "epoch": 0.6406838104951312, "grad_norm": 0.671875, "learning_rate": 6.884028664834075e-05, "loss": 2.4031, "step": 1649 }, { "epoch": 0.6410723391855467, "grad_norm": 0.66015625, "learning_rate": 6.871136564855875e-05, "loss": 2.3244, "step": 1650 }, { "epoch": 0.6414608678759622, "grad_norm": 0.66796875, "learning_rate": 6.858250227024178e-05, "loss": 2.2752, "step": 1651 }, { "epoch": 0.6418493965663777, "grad_norm": 0.64453125, "learning_rate": 6.845369675070594e-05, "loss": 2.3314, "step": 1652 }, { "epoch": 0.6422379252567931, "grad_norm": 0.62890625, "learning_rate": 6.832494932716078e-05, "loss": 2.3836, "step": 1653 }, { "epoch": 0.6426264539472086, "grad_norm": 0.64453125, "learning_rate": 6.819626023670882e-05, "loss": 2.2433, "step": 1654 }, { "epoch": 0.6430149826376241, "grad_norm": 0.64453125, "learning_rate": 6.806762971634524e-05, "loss": 2.4247, "step": 1655 }, { "epoch": 0.6434035113280396, "grad_norm": 0.66015625, "learning_rate": 6.793905800295726e-05, "loss": 2.3075, "step": 1656 }, { "epoch": 0.6437920400184551, "grad_norm": 0.6171875, "learning_rate": 6.78105453333239e-05, "loss": 2.3406, "step": 1657 }, { "epoch": 0.6441805687088706, "grad_norm": 0.66796875, "learning_rate": 6.768209194411532e-05, "loss": 2.3682, "step": 1658 }, { "epoch": 0.644569097399286, "grad_norm": 0.71875, "learning_rate": 6.755369807189263e-05, "loss": 2.3562, "step": 1659 }, { "epoch": 0.6449576260897015, "grad_norm": 0.70703125, "learning_rate": 6.742536395310726e-05, "loss": 2.3898, "step": 1660 }, { "epoch": 0.645346154780117, "grad_norm": 0.625, "learning_rate": 6.729708982410066e-05, "loss": 2.4003, "step": 1661 }, { "epoch": 0.6457346834705325, "grad_norm": 0.640625, "learning_rate": 6.71688759211037e-05, "loss": 2.3614, "step": 1662 }, { "epoch": 0.646123212160948, "grad_norm": 0.65234375, "learning_rate": 6.704072248023643e-05, "loss": 2.3815, "step": 1663 }, { "epoch": 0.6465117408513635, "grad_norm": 0.66796875, "learning_rate": 6.691262973750751e-05, "loss": 2.3747, "step": 1664 }, { "epoch": 0.6469002695417789, "grad_norm": 0.67578125, "learning_rate": 6.678459792881387e-05, "loss": 2.3865, "step": 1665 }, { "epoch": 0.6472887982321944, "grad_norm": 0.6640625, "learning_rate": 6.665662728994013e-05, "loss": 2.2916, "step": 1666 }, { "epoch": 0.6476773269226099, "grad_norm": 0.65625, "learning_rate": 6.652871805655832e-05, "loss": 2.285, "step": 1667 }, { "epoch": 0.6480658556130254, "grad_norm": 0.62890625, "learning_rate": 6.640087046422738e-05, "loss": 2.3718, "step": 1668 }, { "epoch": 0.6484543843034409, "grad_norm": 0.66796875, "learning_rate": 6.627308474839277e-05, "loss": 2.3982, "step": 1669 }, { "epoch": 0.6488429129938564, "grad_norm": 0.65234375, "learning_rate": 6.614536114438589e-05, "loss": 2.4418, "step": 1670 }, { "epoch": 0.6492314416842718, "grad_norm": 0.66796875, "learning_rate": 6.601769988742386e-05, "loss": 2.3537, "step": 1671 }, { "epoch": 0.6496199703746873, "grad_norm": 0.6796875, "learning_rate": 6.58901012126089e-05, "loss": 2.3693, "step": 1672 }, { "epoch": 0.6500084990651028, "grad_norm": 0.640625, "learning_rate": 6.576256535492806e-05, "loss": 2.3887, "step": 1673 }, { "epoch": 0.6503970277555183, "grad_norm": 0.67578125, "learning_rate": 6.563509254925268e-05, "loss": 2.3907, "step": 1674 }, { "epoch": 0.6507855564459338, "grad_norm": 0.65234375, "learning_rate": 6.550768303033789e-05, "loss": 2.398, "step": 1675 }, { "epoch": 0.6511740851363493, "grad_norm": 0.71875, "learning_rate": 6.538033703282243e-05, "loss": 2.3144, "step": 1676 }, { "epoch": 0.6515626138267647, "grad_norm": 0.6640625, "learning_rate": 6.52530547912279e-05, "loss": 2.3954, "step": 1677 }, { "epoch": 0.6519511425171802, "grad_norm": 0.68359375, "learning_rate": 6.512583653995867e-05, "loss": 2.4037, "step": 1678 }, { "epoch": 0.6523396712075957, "grad_norm": 0.73828125, "learning_rate": 6.499868251330102e-05, "loss": 2.3779, "step": 1679 }, { "epoch": 0.6527281998980112, "grad_norm": 0.65234375, "learning_rate": 6.48715929454232e-05, "loss": 2.3817, "step": 1680 }, { "epoch": 0.6531167285884267, "grad_norm": 0.66796875, "learning_rate": 6.474456807037456e-05, "loss": 2.3627, "step": 1681 }, { "epoch": 0.6535052572788422, "grad_norm": 0.6640625, "learning_rate": 6.461760812208544e-05, "loss": 2.3574, "step": 1682 }, { "epoch": 0.6538937859692576, "grad_norm": 0.6796875, "learning_rate": 6.449071333436654e-05, "loss": 2.3536, "step": 1683 }, { "epoch": 0.6542823146596731, "grad_norm": 0.6640625, "learning_rate": 6.436388394090862e-05, "loss": 2.3958, "step": 1684 }, { "epoch": 0.6546708433500886, "grad_norm": 0.640625, "learning_rate": 6.423712017528193e-05, "loss": 2.3059, "step": 1685 }, { "epoch": 0.6550593720405041, "grad_norm": 0.66015625, "learning_rate": 6.411042227093592e-05, "loss": 2.4365, "step": 1686 }, { "epoch": 0.6554479007309196, "grad_norm": 0.67578125, "learning_rate": 6.398379046119873e-05, "loss": 2.3754, "step": 1687 }, { "epoch": 0.6558364294213351, "grad_norm": 0.65625, "learning_rate": 6.38572249792768e-05, "loss": 2.3531, "step": 1688 }, { "epoch": 0.6562249581117505, "grad_norm": 0.6171875, "learning_rate": 6.373072605825435e-05, "loss": 2.3918, "step": 1689 }, { "epoch": 0.656613486802166, "grad_norm": 0.62109375, "learning_rate": 6.360429393109311e-05, "loss": 2.2982, "step": 1690 }, { "epoch": 0.6570020154925815, "grad_norm": 0.70703125, "learning_rate": 6.347792883063173e-05, "loss": 2.4119, "step": 1691 }, { "epoch": 0.657390544182997, "grad_norm": 0.65625, "learning_rate": 6.335163098958552e-05, "loss": 2.3786, "step": 1692 }, { "epoch": 0.6577790728734125, "grad_norm": 0.703125, "learning_rate": 6.322540064054578e-05, "loss": 2.3941, "step": 1693 }, { "epoch": 0.658167601563828, "grad_norm": 0.68359375, "learning_rate": 6.309923801597965e-05, "loss": 2.4113, "step": 1694 }, { "epoch": 0.6585561302542434, "grad_norm": 0.6328125, "learning_rate": 6.297314334822945e-05, "loss": 2.3399, "step": 1695 }, { "epoch": 0.6589446589446589, "grad_norm": 0.62890625, "learning_rate": 6.284711686951243e-05, "loss": 2.385, "step": 1696 }, { "epoch": 0.6593331876350744, "grad_norm": 0.72265625, "learning_rate": 6.272115881192025e-05, "loss": 2.3508, "step": 1697 }, { "epoch": 0.6597217163254899, "grad_norm": 0.74609375, "learning_rate": 6.259526940741847e-05, "loss": 2.3601, "step": 1698 }, { "epoch": 0.6601102450159054, "grad_norm": 0.734375, "learning_rate": 6.246944888784634e-05, "loss": 2.3568, "step": 1699 }, { "epoch": 0.6604987737063209, "grad_norm": 0.63671875, "learning_rate": 6.234369748491617e-05, "loss": 2.3455, "step": 1700 }, { "epoch": 0.6608873023967363, "grad_norm": 0.60546875, "learning_rate": 6.221801543021306e-05, "loss": 2.2836, "step": 1701 }, { "epoch": 0.6612758310871518, "grad_norm": 0.671875, "learning_rate": 6.209240295519428e-05, "loss": 2.4369, "step": 1702 }, { "epoch": 0.6616643597775673, "grad_norm": 0.734375, "learning_rate": 6.196686029118909e-05, "loss": 2.3962, "step": 1703 }, { "epoch": 0.6620528884679828, "grad_norm": 0.6640625, "learning_rate": 6.184138766939811e-05, "loss": 2.3827, "step": 1704 }, { "epoch": 0.6624414171583983, "grad_norm": 0.66015625, "learning_rate": 6.171598532089299e-05, "loss": 2.2286, "step": 1705 }, { "epoch": 0.6628299458488138, "grad_norm": 0.6875, "learning_rate": 6.159065347661596e-05, "loss": 2.3438, "step": 1706 }, { "epoch": 0.6632184745392292, "grad_norm": 0.6484375, "learning_rate": 6.14653923673794e-05, "loss": 2.4297, "step": 1707 }, { "epoch": 0.6636070032296447, "grad_norm": 0.61328125, "learning_rate": 6.134020222386544e-05, "loss": 2.3676, "step": 1708 }, { "epoch": 0.6639955319200602, "grad_norm": 0.65625, "learning_rate": 6.121508327662553e-05, "loss": 2.3893, "step": 1709 }, { "epoch": 0.6643840606104757, "grad_norm": 0.6640625, "learning_rate": 6.109003575607997e-05, "loss": 2.4231, "step": 1710 }, { "epoch": 0.6647725893008912, "grad_norm": 0.65234375, "learning_rate": 6.096505989251753e-05, "loss": 2.3627, "step": 1711 }, { "epoch": 0.6651611179913066, "grad_norm": 0.6484375, "learning_rate": 6.0840155916095044e-05, "loss": 2.3278, "step": 1712 }, { "epoch": 0.6655496466817221, "grad_norm": 0.61328125, "learning_rate": 6.071532405683691e-05, "loss": 2.3325, "step": 1713 }, { "epoch": 0.6659381753721376, "grad_norm": 0.6640625, "learning_rate": 6.059056454463479e-05, "loss": 2.3673, "step": 1714 }, { "epoch": 0.6663267040625531, "grad_norm": 0.625, "learning_rate": 6.046587760924698e-05, "loss": 2.308, "step": 1715 }, { "epoch": 0.6667152327529686, "grad_norm": 0.6328125, "learning_rate": 6.034126348029827e-05, "loss": 2.3354, "step": 1716 }, { "epoch": 0.6671037614433841, "grad_norm": 0.62890625, "learning_rate": 6.021672238727927e-05, "loss": 2.3802, "step": 1717 }, { "epoch": 0.6674922901337995, "grad_norm": 0.671875, "learning_rate": 6.009225455954614e-05, "loss": 2.3173, "step": 1718 }, { "epoch": 0.667880818824215, "grad_norm": 0.6484375, "learning_rate": 5.996786022632004e-05, "loss": 2.417, "step": 1719 }, { "epoch": 0.6682693475146305, "grad_norm": 0.65234375, "learning_rate": 5.9843539616686875e-05, "loss": 2.3546, "step": 1720 }, { "epoch": 0.668657876205046, "grad_norm": 0.67578125, "learning_rate": 5.971929295959671e-05, "loss": 2.3215, "step": 1721 }, { "epoch": 0.6690464048954615, "grad_norm": 0.6328125, "learning_rate": 5.959512048386349e-05, "loss": 2.3583, "step": 1722 }, { "epoch": 0.669434933585877, "grad_norm": 0.65625, "learning_rate": 5.9471022418164425e-05, "loss": 2.4115, "step": 1723 }, { "epoch": 0.6698234622762924, "grad_norm": 0.65234375, "learning_rate": 5.934699899103982e-05, "loss": 2.4339, "step": 1724 }, { "epoch": 0.6702119909667079, "grad_norm": 0.63671875, "learning_rate": 5.9223050430892445e-05, "loss": 2.3634, "step": 1725 }, { "epoch": 0.6706005196571234, "grad_norm": 0.69921875, "learning_rate": 5.909917696598721e-05, "loss": 2.373, "step": 1726 }, { "epoch": 0.6709890483475389, "grad_norm": 0.671875, "learning_rate": 5.897537882445079e-05, "loss": 2.4134, "step": 1727 }, { "epoch": 0.6713775770379544, "grad_norm": 0.62109375, "learning_rate": 5.8851656234271055e-05, "loss": 2.3665, "step": 1728 }, { "epoch": 0.6717661057283699, "grad_norm": 0.6484375, "learning_rate": 5.872800942329677e-05, "loss": 2.3632, "step": 1729 }, { "epoch": 0.6721546344187853, "grad_norm": 0.67578125, "learning_rate": 5.860443861923718e-05, "loss": 2.3408, "step": 1730 }, { "epoch": 0.6725431631092008, "grad_norm": 0.625, "learning_rate": 5.848094404966149e-05, "loss": 2.3687, "step": 1731 }, { "epoch": 0.6729316917996163, "grad_norm": 0.69140625, "learning_rate": 5.83575259419986e-05, "loss": 2.4013, "step": 1732 }, { "epoch": 0.6733202204900318, "grad_norm": 0.671875, "learning_rate": 5.82341845235365e-05, "loss": 2.3826, "step": 1733 }, { "epoch": 0.6737087491804473, "grad_norm": 0.64453125, "learning_rate": 5.811092002142198e-05, "loss": 2.4015, "step": 1734 }, { "epoch": 0.6740972778708628, "grad_norm": 0.62890625, "learning_rate": 5.798773266266022e-05, "loss": 2.3255, "step": 1735 }, { "epoch": 0.6744858065612782, "grad_norm": 0.625, "learning_rate": 5.786462267411431e-05, "loss": 2.3693, "step": 1736 }, { "epoch": 0.6748743352516937, "grad_norm": 0.6484375, "learning_rate": 5.774159028250483e-05, "loss": 2.407, "step": 1737 }, { "epoch": 0.6752628639421092, "grad_norm": 0.71875, "learning_rate": 5.761863571440949e-05, "loss": 2.4144, "step": 1738 }, { "epoch": 0.6756513926325247, "grad_norm": 0.66015625, "learning_rate": 5.749575919626269e-05, "loss": 2.3344, "step": 1739 }, { "epoch": 0.6760399213229402, "grad_norm": 0.65625, "learning_rate": 5.737296095435498e-05, "loss": 2.3339, "step": 1740 }, { "epoch": 0.6764284500133557, "grad_norm": 0.7109375, "learning_rate": 5.7250241214832914e-05, "loss": 2.3194, "step": 1741 }, { "epoch": 0.6768169787037711, "grad_norm": 0.6796875, "learning_rate": 5.7127600203698385e-05, "loss": 2.3024, "step": 1742 }, { "epoch": 0.6772055073941866, "grad_norm": 0.6796875, "learning_rate": 5.700503814680831e-05, "loss": 2.3236, "step": 1743 }, { "epoch": 0.6775940360846021, "grad_norm": 0.609375, "learning_rate": 5.6882555269874224e-05, "loss": 2.3919, "step": 1744 }, { "epoch": 0.6779825647750176, "grad_norm": 0.6328125, "learning_rate": 5.676015179846181e-05, "loss": 2.3636, "step": 1745 }, { "epoch": 0.6783710934654331, "grad_norm": 0.6484375, "learning_rate": 5.663782795799057e-05, "loss": 2.397, "step": 1746 }, { "epoch": 0.6787596221558486, "grad_norm": 0.6953125, "learning_rate": 5.651558397373335e-05, "loss": 2.3954, "step": 1747 }, { "epoch": 0.679148150846264, "grad_norm": 0.62890625, "learning_rate": 5.6393420070815806e-05, "loss": 2.3431, "step": 1748 }, { "epoch": 0.6795366795366795, "grad_norm": 0.65625, "learning_rate": 5.627133647421628e-05, "loss": 2.3616, "step": 1749 }, { "epoch": 0.679925208227095, "grad_norm": 0.640625, "learning_rate": 5.614933340876515e-05, "loss": 2.4101, "step": 1750 }, { "epoch": 0.6803137369175105, "grad_norm": 0.64453125, "learning_rate": 5.602741109914449e-05, "loss": 2.3575, "step": 1751 }, { "epoch": 0.680702265607926, "grad_norm": 0.6640625, "learning_rate": 5.590556976988766e-05, "loss": 2.396, "step": 1752 }, { "epoch": 0.6810907942983415, "grad_norm": 0.671875, "learning_rate": 5.578380964537888e-05, "loss": 2.3252, "step": 1753 }, { "epoch": 0.6814793229887569, "grad_norm": 0.609375, "learning_rate": 5.566213094985282e-05, "loss": 2.3266, "step": 1754 }, { "epoch": 0.6818678516791724, "grad_norm": 0.6484375, "learning_rate": 5.55405339073942e-05, "loss": 2.3403, "step": 1755 }, { "epoch": 0.6822563803695879, "grad_norm": 0.6484375, "learning_rate": 5.5419018741937424e-05, "loss": 2.3728, "step": 1756 }, { "epoch": 0.6826449090600034, "grad_norm": 0.67578125, "learning_rate": 5.529758567726593e-05, "loss": 2.3492, "step": 1757 }, { "epoch": 0.6830334377504189, "grad_norm": 0.6953125, "learning_rate": 5.517623493701213e-05, "loss": 2.3451, "step": 1758 }, { "epoch": 0.6834219664408344, "grad_norm": 0.6328125, "learning_rate": 5.505496674465678e-05, "loss": 2.3361, "step": 1759 }, { "epoch": 0.6838104951312498, "grad_norm": 0.62890625, "learning_rate": 5.49337813235286e-05, "loss": 2.3548, "step": 1760 }, { "epoch": 0.6841990238216653, "grad_norm": 0.63671875, "learning_rate": 5.481267889680388e-05, "loss": 2.3969, "step": 1761 }, { "epoch": 0.6845875525120808, "grad_norm": 0.64453125, "learning_rate": 5.4691659687506094e-05, "loss": 2.3452, "step": 1762 }, { "epoch": 0.6849760812024963, "grad_norm": 0.69921875, "learning_rate": 5.457072391850543e-05, "loss": 2.3758, "step": 1763 }, { "epoch": 0.6853646098929118, "grad_norm": 0.65234375, "learning_rate": 5.444987181251845e-05, "loss": 2.4076, "step": 1764 }, { "epoch": 0.6857531385833273, "grad_norm": 0.66796875, "learning_rate": 5.432910359210754e-05, "loss": 2.3528, "step": 1765 }, { "epoch": 0.6861416672737427, "grad_norm": 0.62890625, "learning_rate": 5.420841947968076e-05, "loss": 2.3899, "step": 1766 }, { "epoch": 0.6865301959641582, "grad_norm": 0.62109375, "learning_rate": 5.4087819697491116e-05, "loss": 2.3693, "step": 1767 }, { "epoch": 0.6869187246545737, "grad_norm": 0.6484375, "learning_rate": 5.3967304467636406e-05, "loss": 2.3826, "step": 1768 }, { "epoch": 0.6873072533449892, "grad_norm": 0.640625, "learning_rate": 5.38468740120587e-05, "loss": 2.3899, "step": 1769 }, { "epoch": 0.6876957820354047, "grad_norm": 0.609375, "learning_rate": 5.372652855254394e-05, "loss": 2.3605, "step": 1770 }, { "epoch": 0.6880843107258202, "grad_norm": 0.640625, "learning_rate": 5.360626831072156e-05, "loss": 2.4103, "step": 1771 }, { "epoch": 0.6884728394162356, "grad_norm": 0.671875, "learning_rate": 5.348609350806402e-05, "loss": 2.3197, "step": 1772 }, { "epoch": 0.6888613681066511, "grad_norm": 0.62109375, "learning_rate": 5.3366004365886433e-05, "loss": 2.3234, "step": 1773 }, { "epoch": 0.6892498967970666, "grad_norm": 0.66015625, "learning_rate": 5.3246001105346254e-05, "loss": 2.3023, "step": 1774 }, { "epoch": 0.6896384254874821, "grad_norm": 0.68359375, "learning_rate": 5.3126083947442584e-05, "loss": 2.4167, "step": 1775 }, { "epoch": 0.6900269541778976, "grad_norm": 0.61328125, "learning_rate": 5.300625311301615e-05, "loss": 2.3408, "step": 1776 }, { "epoch": 0.690415482868313, "grad_norm": 0.62890625, "learning_rate": 5.2886508822748585e-05, "loss": 2.347, "step": 1777 }, { "epoch": 0.6908040115587285, "grad_norm": 0.60546875, "learning_rate": 5.27668512971622e-05, "loss": 2.3668, "step": 1778 }, { "epoch": 0.691192540249144, "grad_norm": 0.625, "learning_rate": 5.264728075661951e-05, "loss": 2.3325, "step": 1779 }, { "epoch": 0.6915810689395595, "grad_norm": 0.625, "learning_rate": 5.2527797421322835e-05, "loss": 2.3634, "step": 1780 }, { "epoch": 0.691969597629975, "grad_norm": 0.6953125, "learning_rate": 5.240840151131388e-05, "loss": 2.3602, "step": 1781 }, { "epoch": 0.6923581263203905, "grad_norm": 0.64453125, "learning_rate": 5.2289093246473355e-05, "loss": 2.425, "step": 1782 }, { "epoch": 0.6927466550108059, "grad_norm": 0.6328125, "learning_rate": 5.216987284652061e-05, "loss": 2.3902, "step": 1783 }, { "epoch": 0.6931351837012214, "grad_norm": 0.66015625, "learning_rate": 5.205074053101306e-05, "loss": 2.3617, "step": 1784 }, { "epoch": 0.6935237123916369, "grad_norm": 0.65234375, "learning_rate": 5.193169651934603e-05, "loss": 2.3791, "step": 1785 }, { "epoch": 0.6939122410820524, "grad_norm": 0.6328125, "learning_rate": 5.181274103075215e-05, "loss": 2.3141, "step": 1786 }, { "epoch": 0.6943007697724679, "grad_norm": 0.61328125, "learning_rate": 5.169387428430108e-05, "loss": 2.3242, "step": 1787 }, { "epoch": 0.6946892984628834, "grad_norm": 0.6796875, "learning_rate": 5.1575096498899e-05, "loss": 2.3407, "step": 1788 }, { "epoch": 0.6950778271532988, "grad_norm": 0.625, "learning_rate": 5.1456407893288275e-05, "loss": 2.3777, "step": 1789 }, { "epoch": 0.6954663558437143, "grad_norm": 0.64453125, "learning_rate": 5.133780868604705e-05, "loss": 2.3728, "step": 1790 }, { "epoch": 0.6958548845341298, "grad_norm": 0.6484375, "learning_rate": 5.121929909558887e-05, "loss": 2.3792, "step": 1791 }, { "epoch": 0.6962434132245453, "grad_norm": 0.625, "learning_rate": 5.110087934016209e-05, "loss": 2.3423, "step": 1792 }, { "epoch": 0.6966319419149608, "grad_norm": 0.625, "learning_rate": 5.098254963784979e-05, "loss": 2.3595, "step": 1793 }, { "epoch": 0.6970204706053763, "grad_norm": 0.6328125, "learning_rate": 5.0864310206569144e-05, "loss": 2.3816, "step": 1794 }, { "epoch": 0.6974089992957917, "grad_norm": 0.671875, "learning_rate": 5.074616126407106e-05, "loss": 2.4156, "step": 1795 }, { "epoch": 0.6977975279862072, "grad_norm": 0.671875, "learning_rate": 5.0628103027939855e-05, "loss": 2.3937, "step": 1796 }, { "epoch": 0.6981860566766227, "grad_norm": 0.65625, "learning_rate": 5.0510135715592745e-05, "loss": 2.388, "step": 1797 }, { "epoch": 0.6985745853670382, "grad_norm": 0.66796875, "learning_rate": 5.039225954427953e-05, "loss": 2.3512, "step": 1798 }, { "epoch": 0.6989631140574537, "grad_norm": 0.640625, "learning_rate": 5.027447473108218e-05, "loss": 2.4035, "step": 1799 }, { "epoch": 0.6993516427478692, "grad_norm": 0.69921875, "learning_rate": 5.015678149291439e-05, "loss": 2.3672, "step": 1800 }, { "epoch": 0.6997401714382846, "grad_norm": 0.61328125, "learning_rate": 5.003918004652117e-05, "loss": 2.3271, "step": 1801 }, { "epoch": 0.7001287001287001, "grad_norm": 0.60546875, "learning_rate": 4.992167060847858e-05, "loss": 2.3346, "step": 1802 }, { "epoch": 0.7005172288191156, "grad_norm": 0.625, "learning_rate": 4.9804253395193154e-05, "loss": 2.2578, "step": 1803 }, { "epoch": 0.7009057575095311, "grad_norm": 0.640625, "learning_rate": 4.968692862290165e-05, "loss": 2.3872, "step": 1804 }, { "epoch": 0.7012942861999466, "grad_norm": 0.65625, "learning_rate": 4.9569696507670535e-05, "loss": 2.3211, "step": 1805 }, { "epoch": 0.7016828148903621, "grad_norm": 0.63671875, "learning_rate": 4.945255726539566e-05, "loss": 2.3605, "step": 1806 }, { "epoch": 0.7020713435807775, "grad_norm": 0.66015625, "learning_rate": 4.9335511111801836e-05, "loss": 2.4147, "step": 1807 }, { "epoch": 0.702459872271193, "grad_norm": 0.66015625, "learning_rate": 4.921855826244248e-05, "loss": 2.3586, "step": 1808 }, { "epoch": 0.7028484009616085, "grad_norm": 0.60546875, "learning_rate": 4.910169893269908e-05, "loss": 2.279, "step": 1809 }, { "epoch": 0.703236929652024, "grad_norm": 0.609375, "learning_rate": 4.8984933337781e-05, "loss": 2.3442, "step": 1810 }, { "epoch": 0.7036254583424395, "grad_norm": 0.7109375, "learning_rate": 4.886826169272489e-05, "loss": 2.3817, "step": 1811 }, { "epoch": 0.704013987032855, "grad_norm": 0.62109375, "learning_rate": 4.8751684212394466e-05, "loss": 2.3372, "step": 1812 }, { "epoch": 0.7044025157232704, "grad_norm": 0.640625, "learning_rate": 4.8635201111479986e-05, "loss": 2.403, "step": 1813 }, { "epoch": 0.7047910444136859, "grad_norm": 0.62890625, "learning_rate": 4.851881260449791e-05, "loss": 2.3849, "step": 1814 }, { "epoch": 0.7051795731041014, "grad_norm": 0.640625, "learning_rate": 4.840251890579047e-05, "loss": 2.3893, "step": 1815 }, { "epoch": 0.7055681017945169, "grad_norm": 0.671875, "learning_rate": 4.828632022952535e-05, "loss": 2.3303, "step": 1816 }, { "epoch": 0.7059566304849324, "grad_norm": 0.6328125, "learning_rate": 4.817021678969518e-05, "loss": 2.3204, "step": 1817 }, { "epoch": 0.7063451591753479, "grad_norm": 0.61328125, "learning_rate": 4.805420880011723e-05, "loss": 2.3551, "step": 1818 }, { "epoch": 0.7067336878657633, "grad_norm": 0.6484375, "learning_rate": 4.793829647443302e-05, "loss": 2.3654, "step": 1819 }, { "epoch": 0.7071222165561788, "grad_norm": 0.6328125, "learning_rate": 4.782248002610781e-05, "loss": 2.2714, "step": 1820 }, { "epoch": 0.7075107452465943, "grad_norm": 0.640625, "learning_rate": 4.770675966843035e-05, "loss": 2.3196, "step": 1821 }, { "epoch": 0.7078992739370098, "grad_norm": 0.625, "learning_rate": 4.7591135614512417e-05, "loss": 2.4134, "step": 1822 }, { "epoch": 0.7082878026274253, "grad_norm": 0.62890625, "learning_rate": 4.747560807728847e-05, "loss": 2.3107, "step": 1823 }, { "epoch": 0.7086763313178408, "grad_norm": 0.61328125, "learning_rate": 4.736017726951515e-05, "loss": 2.3464, "step": 1824 }, { "epoch": 0.7090648600082562, "grad_norm": 0.64453125, "learning_rate": 4.724484340377102e-05, "loss": 2.3962, "step": 1825 }, { "epoch": 0.7094533886986717, "grad_norm": 0.6640625, "learning_rate": 4.7129606692456095e-05, "loss": 2.411, "step": 1826 }, { "epoch": 0.7098419173890872, "grad_norm": 0.64453125, "learning_rate": 4.701446734779149e-05, "loss": 2.3093, "step": 1827 }, { "epoch": 0.7102304460795027, "grad_norm": 0.6328125, "learning_rate": 4.6899425581818924e-05, "loss": 2.3815, "step": 1828 }, { "epoch": 0.7106189747699182, "grad_norm": 0.6875, "learning_rate": 4.6784481606400496e-05, "loss": 2.4293, "step": 1829 }, { "epoch": 0.7110075034603337, "grad_norm": 0.65234375, "learning_rate": 4.66696356332182e-05, "loss": 2.3517, "step": 1830 }, { "epoch": 0.7113960321507491, "grad_norm": 0.67578125, "learning_rate": 4.655488787377354e-05, "loss": 2.4058, "step": 1831 }, { "epoch": 0.7117845608411646, "grad_norm": 0.6171875, "learning_rate": 4.644023853938716e-05, "loss": 2.2576, "step": 1832 }, { "epoch": 0.7121730895315801, "grad_norm": 0.6015625, "learning_rate": 4.6325687841198415e-05, "loss": 2.2638, "step": 1833 }, { "epoch": 0.7125616182219956, "grad_norm": 0.65234375, "learning_rate": 4.621123599016505e-05, "loss": 2.2876, "step": 1834 }, { "epoch": 0.7129501469124111, "grad_norm": 0.640625, "learning_rate": 4.609688319706271e-05, "loss": 2.316, "step": 1835 }, { "epoch": 0.7133386756028266, "grad_norm": 0.66015625, "learning_rate": 4.5982629672484734e-05, "loss": 2.3727, "step": 1836 }, { "epoch": 0.713727204293242, "grad_norm": 0.66015625, "learning_rate": 4.586847562684147e-05, "loss": 2.3608, "step": 1837 }, { "epoch": 0.7141157329836575, "grad_norm": 0.63671875, "learning_rate": 4.575442127036019e-05, "loss": 2.2822, "step": 1838 }, { "epoch": 0.714504261674073, "grad_norm": 0.6328125, "learning_rate": 4.564046681308457e-05, "loss": 2.3288, "step": 1839 }, { "epoch": 0.7148927903644885, "grad_norm": 0.73046875, "learning_rate": 4.5526612464874285e-05, "loss": 2.3741, "step": 1840 }, { "epoch": 0.715281319054904, "grad_norm": 0.6328125, "learning_rate": 4.541285843540465e-05, "loss": 2.2713, "step": 1841 }, { "epoch": 0.7156698477453195, "grad_norm": 0.61328125, "learning_rate": 4.5299204934166216e-05, "loss": 2.3915, "step": 1842 }, { "epoch": 0.7160583764357349, "grad_norm": 0.64453125, "learning_rate": 4.518565217046445e-05, "loss": 2.3431, "step": 1843 }, { "epoch": 0.7164469051261504, "grad_norm": 0.6328125, "learning_rate": 4.507220035341929e-05, "loss": 2.3615, "step": 1844 }, { "epoch": 0.7168354338165659, "grad_norm": 0.6171875, "learning_rate": 4.4958849691964675e-05, "loss": 2.2747, "step": 1845 }, { "epoch": 0.7172239625069814, "grad_norm": 0.66015625, "learning_rate": 4.484560039484836e-05, "loss": 2.359, "step": 1846 }, { "epoch": 0.7176124911973969, "grad_norm": 0.6875, "learning_rate": 4.47324526706314e-05, "loss": 2.4031, "step": 1847 }, { "epoch": 0.7180010198878123, "grad_norm": 0.66015625, "learning_rate": 4.4619406727687785e-05, "loss": 2.4118, "step": 1848 }, { "epoch": 0.7183895485782278, "grad_norm": 3.046875, "learning_rate": 4.450646277420407e-05, "loss": 2.4098, "step": 1849 }, { "epoch": 0.7187780772686433, "grad_norm": 0.61328125, "learning_rate": 4.439362101817898e-05, "loss": 2.3236, "step": 1850 }, { "epoch": 0.7191666059590588, "grad_norm": 0.62109375, "learning_rate": 4.4280881667423026e-05, "loss": 2.3833, "step": 1851 }, { "epoch": 0.7195551346494743, "grad_norm": 0.65234375, "learning_rate": 4.416824492955817e-05, "loss": 2.3257, "step": 1852 }, { "epoch": 0.7199436633398898, "grad_norm": 0.65625, "learning_rate": 4.40557110120173e-05, "loss": 2.3589, "step": 1853 }, { "epoch": 0.7203321920303052, "grad_norm": 0.65625, "learning_rate": 4.394328012204409e-05, "loss": 2.357, "step": 1854 }, { "epoch": 0.7207207207207207, "grad_norm": 0.6796875, "learning_rate": 4.383095246669232e-05, "loss": 2.3558, "step": 1855 }, { "epoch": 0.7211092494111362, "grad_norm": 0.6328125, "learning_rate": 4.371872825282578e-05, "loss": 2.3796, "step": 1856 }, { "epoch": 0.7214977781015517, "grad_norm": 0.6796875, "learning_rate": 4.3606607687117704e-05, "loss": 2.4108, "step": 1857 }, { "epoch": 0.7218863067919672, "grad_norm": 0.640625, "learning_rate": 4.349459097605047e-05, "loss": 2.3564, "step": 1858 }, { "epoch": 0.7222748354823827, "grad_norm": 0.62109375, "learning_rate": 4.338267832591519e-05, "loss": 2.3506, "step": 1859 }, { "epoch": 0.7226633641727981, "grad_norm": 0.6640625, "learning_rate": 4.327086994281132e-05, "loss": 2.3364, "step": 1860 }, { "epoch": 0.7230518928632136, "grad_norm": 0.625, "learning_rate": 4.31591660326463e-05, "loss": 2.4074, "step": 1861 }, { "epoch": 0.7234404215536291, "grad_norm": 3.0, "learning_rate": 4.30475668011352e-05, "loss": 2.3424, "step": 1862 }, { "epoch": 0.7238289502440446, "grad_norm": 0.640625, "learning_rate": 4.29360724538003e-05, "loss": 2.3817, "step": 1863 }, { "epoch": 0.7242174789344601, "grad_norm": 0.64453125, "learning_rate": 4.282468319597067e-05, "loss": 2.3692, "step": 1864 }, { "epoch": 0.7246060076248756, "grad_norm": 0.65625, "learning_rate": 4.2713399232781914e-05, "loss": 2.2731, "step": 1865 }, { "epoch": 0.724994536315291, "grad_norm": 0.63671875, "learning_rate": 4.2602220769175717e-05, "loss": 2.3326, "step": 1866 }, { "epoch": 0.7253830650057065, "grad_norm": 0.65234375, "learning_rate": 4.249114800989944e-05, "loss": 2.389, "step": 1867 }, { "epoch": 0.725771593696122, "grad_norm": 0.625, "learning_rate": 4.238018115950582e-05, "loss": 2.2927, "step": 1868 }, { "epoch": 0.7261601223865375, "grad_norm": 0.62890625, "learning_rate": 4.22693204223525e-05, "loss": 2.3935, "step": 1869 }, { "epoch": 0.726548651076953, "grad_norm": 0.59765625, "learning_rate": 4.215856600260176e-05, "loss": 2.3794, "step": 1870 }, { "epoch": 0.7269371797673685, "grad_norm": 0.6640625, "learning_rate": 4.204791810422008e-05, "loss": 2.3499, "step": 1871 }, { "epoch": 0.7273257084577839, "grad_norm": 0.62109375, "learning_rate": 4.1937376930977704e-05, "loss": 2.3803, "step": 1872 }, { "epoch": 0.7277142371481994, "grad_norm": 0.63671875, "learning_rate": 4.182694268644837e-05, "loss": 2.3551, "step": 1873 }, { "epoch": 0.7281027658386149, "grad_norm": 0.58984375, "learning_rate": 4.17166155740089e-05, "loss": 2.3986, "step": 1874 }, { "epoch": 0.7284912945290304, "grad_norm": 0.60546875, "learning_rate": 4.160639579683885e-05, "loss": 2.301, "step": 1875 }, { "epoch": 0.7288798232194459, "grad_norm": 0.62109375, "learning_rate": 4.149628355792005e-05, "loss": 2.3123, "step": 1876 }, { "epoch": 0.7292683519098614, "grad_norm": 0.62109375, "learning_rate": 4.1386279060036316e-05, "loss": 2.3621, "step": 1877 }, { "epoch": 0.7296568806002768, "grad_norm": 0.62109375, "learning_rate": 4.127638250577305e-05, "loss": 2.305, "step": 1878 }, { "epoch": 0.7300454092906923, "grad_norm": 0.625, "learning_rate": 4.1166594097516854e-05, "loss": 2.3822, "step": 1879 }, { "epoch": 0.7304339379811078, "grad_norm": 0.62890625, "learning_rate": 4.105691403745519e-05, "loss": 2.346, "step": 1880 }, { "epoch": 0.7308224666715233, "grad_norm": 0.64453125, "learning_rate": 4.0947342527575916e-05, "loss": 2.3148, "step": 1881 }, { "epoch": 0.7312109953619388, "grad_norm": 0.61328125, "learning_rate": 4.0837879769667045e-05, "loss": 2.3351, "step": 1882 }, { "epoch": 0.7315995240523543, "grad_norm": 0.5859375, "learning_rate": 4.072852596531631e-05, "loss": 2.3522, "step": 1883 }, { "epoch": 0.7319880527427697, "grad_norm": 0.67578125, "learning_rate": 4.06192813159108e-05, "loss": 2.4085, "step": 1884 }, { "epoch": 0.7323765814331852, "grad_norm": 0.62890625, "learning_rate": 4.051014602263652e-05, "loss": 2.3633, "step": 1885 }, { "epoch": 0.7327651101236007, "grad_norm": 0.64453125, "learning_rate": 4.0401120286478154e-05, "loss": 2.422, "step": 1886 }, { "epoch": 0.7331536388140162, "grad_norm": 0.625, "learning_rate": 4.029220430821862e-05, "loss": 2.4292, "step": 1887 }, { "epoch": 0.7335421675044317, "grad_norm": 0.6015625, "learning_rate": 4.018339828843868e-05, "loss": 2.3436, "step": 1888 }, { "epoch": 0.7339306961948472, "grad_norm": 0.625, "learning_rate": 4.007470242751654e-05, "loss": 2.4115, "step": 1889 }, { "epoch": 0.7343192248852626, "grad_norm": 0.625, "learning_rate": 3.996611692562764e-05, "loss": 2.4112, "step": 1890 }, { "epoch": 0.7347077535756781, "grad_norm": 0.625, "learning_rate": 3.985764198274414e-05, "loss": 2.3491, "step": 1891 }, { "epoch": 0.7350962822660936, "grad_norm": 0.60546875, "learning_rate": 3.9749277798634575e-05, "loss": 2.288, "step": 1892 }, { "epoch": 0.7354848109565091, "grad_norm": 0.6171875, "learning_rate": 3.964102457286353e-05, "loss": 2.3784, "step": 1893 }, { "epoch": 0.7358733396469246, "grad_norm": 0.6640625, "learning_rate": 3.9532882504791236e-05, "loss": 2.3957, "step": 1894 }, { "epoch": 0.7362618683373401, "grad_norm": 0.625, "learning_rate": 3.9424851793573224e-05, "loss": 2.358, "step": 1895 }, { "epoch": 0.7366503970277555, "grad_norm": 0.62109375, "learning_rate": 3.931693263815997e-05, "loss": 2.4146, "step": 1896 }, { "epoch": 0.737038925718171, "grad_norm": 0.65625, "learning_rate": 3.920912523729642e-05, "loss": 2.4071, "step": 1897 }, { "epoch": 0.7374274544085865, "grad_norm": 0.69921875, "learning_rate": 3.9101429789521827e-05, "loss": 2.2899, "step": 1898 }, { "epoch": 0.737815983099002, "grad_norm": 0.66015625, "learning_rate": 3.8993846493169164e-05, "loss": 2.3797, "step": 1899 }, { "epoch": 0.7382045117894175, "grad_norm": 0.625, "learning_rate": 3.888637554636493e-05, "loss": 2.4159, "step": 1900 }, { "epoch": 0.738593040479833, "grad_norm": 0.67578125, "learning_rate": 3.877901714702873e-05, "loss": 2.385, "step": 1901 }, { "epoch": 0.7389815691702484, "grad_norm": 0.6484375, "learning_rate": 3.867177149287286e-05, "loss": 2.2841, "step": 1902 }, { "epoch": 0.7393700978606639, "grad_norm": 0.609375, "learning_rate": 3.8564638781402e-05, "loss": 2.3939, "step": 1903 }, { "epoch": 0.7397586265510794, "grad_norm": 0.6171875, "learning_rate": 3.845761920991283e-05, "loss": 2.4283, "step": 1904 }, { "epoch": 0.7401471552414949, "grad_norm": 0.625, "learning_rate": 3.83507129754937e-05, "loss": 2.4112, "step": 1905 }, { "epoch": 0.7405356839319104, "grad_norm": 0.67578125, "learning_rate": 3.824392027502417e-05, "loss": 2.3811, "step": 1906 }, { "epoch": 0.7409242126223259, "grad_norm": 0.6328125, "learning_rate": 3.8137241305174834e-05, "loss": 2.3265, "step": 1907 }, { "epoch": 0.7413127413127413, "grad_norm": 0.703125, "learning_rate": 3.803067626240665e-05, "loss": 2.2776, "step": 1908 }, { "epoch": 0.7417012700031568, "grad_norm": 0.6796875, "learning_rate": 3.7924225342970945e-05, "loss": 2.4382, "step": 1909 }, { "epoch": 0.7420897986935723, "grad_norm": 0.62890625, "learning_rate": 3.781788874290879e-05, "loss": 2.3497, "step": 1910 }, { "epoch": 0.7424783273839878, "grad_norm": 0.59765625, "learning_rate": 3.771166665805075e-05, "loss": 2.3409, "step": 1911 }, { "epoch": 0.7428668560744033, "grad_norm": 0.60546875, "learning_rate": 3.760555928401647e-05, "loss": 2.3056, "step": 1912 }, { "epoch": 0.7432553847648187, "grad_norm": 0.640625, "learning_rate": 3.749956681621438e-05, "loss": 2.3703, "step": 1913 }, { "epoch": 0.7436439134552342, "grad_norm": 0.62890625, "learning_rate": 3.7393689449841286e-05, "loss": 2.3297, "step": 1914 }, { "epoch": 0.7440324421456497, "grad_norm": 0.63671875, "learning_rate": 3.728792737988204e-05, "loss": 2.3389, "step": 1915 }, { "epoch": 0.7444209708360652, "grad_norm": 0.6328125, "learning_rate": 3.718228080110906e-05, "loss": 2.3029, "step": 1916 }, { "epoch": 0.7448094995264807, "grad_norm": 0.64453125, "learning_rate": 3.707674990808219e-05, "loss": 2.3986, "step": 1917 }, { "epoch": 0.7451980282168962, "grad_norm": 0.59765625, "learning_rate": 3.69713348951482e-05, "loss": 2.2478, "step": 1918 }, { "epoch": 0.7455865569073116, "grad_norm": 0.625, "learning_rate": 3.686603595644044e-05, "loss": 2.4261, "step": 1919 }, { "epoch": 0.7459750855977271, "grad_norm": 0.6328125, "learning_rate": 3.6760853285878486e-05, "loss": 2.3709, "step": 1920 }, { "epoch": 0.7463636142881426, "grad_norm": 0.60546875, "learning_rate": 3.6655787077167825e-05, "loss": 2.3114, "step": 1921 }, { "epoch": 0.7467521429785581, "grad_norm": 0.60546875, "learning_rate": 3.655083752379943e-05, "loss": 2.3476, "step": 1922 }, { "epoch": 0.7471406716689736, "grad_norm": 0.609375, "learning_rate": 3.644600481904947e-05, "loss": 2.4215, "step": 1923 }, { "epoch": 0.7475292003593891, "grad_norm": 0.62890625, "learning_rate": 3.634128915597895e-05, "loss": 2.3943, "step": 1924 }, { "epoch": 0.7479177290498045, "grad_norm": 0.6328125, "learning_rate": 3.623669072743321e-05, "loss": 2.4181, "step": 1925 }, { "epoch": 0.74830625774022, "grad_norm": 0.66796875, "learning_rate": 3.613220972604182e-05, "loss": 2.3593, "step": 1926 }, { "epoch": 0.7486947864306355, "grad_norm": 0.6953125, "learning_rate": 3.602784634421804e-05, "loss": 2.3978, "step": 1927 }, { "epoch": 0.749083315121051, "grad_norm": 0.60546875, "learning_rate": 3.5923600774158525e-05, "loss": 2.3903, "step": 1928 }, { "epoch": 0.7494718438114665, "grad_norm": 0.609375, "learning_rate": 3.581947320784299e-05, "loss": 2.3521, "step": 1929 }, { "epoch": 0.749860372501882, "grad_norm": 0.671875, "learning_rate": 3.571546383703379e-05, "loss": 2.3271, "step": 1930 }, { "epoch": 0.7502489011922974, "grad_norm": 0.6796875, "learning_rate": 3.561157285327564e-05, "loss": 2.3692, "step": 1931 }, { "epoch": 0.7506374298827129, "grad_norm": 0.65234375, "learning_rate": 3.550780044789525e-05, "loss": 2.3958, "step": 1932 }, { "epoch": 0.7510259585731284, "grad_norm": 0.64453125, "learning_rate": 3.540414681200089e-05, "loss": 2.3941, "step": 1933 }, { "epoch": 0.7514144872635439, "grad_norm": 0.62890625, "learning_rate": 3.5300612136482146e-05, "loss": 2.3173, "step": 1934 }, { "epoch": 0.7518030159539594, "grad_norm": 0.68359375, "learning_rate": 3.519719661200954e-05, "loss": 2.4301, "step": 1935 }, { "epoch": 0.7521915446443749, "grad_norm": 0.58984375, "learning_rate": 3.5093900429034134e-05, "loss": 2.3899, "step": 1936 }, { "epoch": 0.7525800733347903, "grad_norm": 0.6171875, "learning_rate": 3.499072377778724e-05, "loss": 2.3445, "step": 1937 }, { "epoch": 0.7529686020252058, "grad_norm": 0.609375, "learning_rate": 3.488766684828e-05, "loss": 2.2973, "step": 1938 }, { "epoch": 0.7533571307156213, "grad_norm": 0.63671875, "learning_rate": 3.4784729830303096e-05, "loss": 2.3857, "step": 1939 }, { "epoch": 0.7537456594060368, "grad_norm": 0.671875, "learning_rate": 3.468191291342644e-05, "loss": 2.3077, "step": 1940 }, { "epoch": 0.7541341880964523, "grad_norm": 0.625, "learning_rate": 3.4579216286998615e-05, "loss": 2.3862, "step": 1941 }, { "epoch": 0.7545227167868678, "grad_norm": 0.62109375, "learning_rate": 3.4476640140146796e-05, "loss": 2.3252, "step": 1942 }, { "epoch": 0.7549112454772832, "grad_norm": 0.6328125, "learning_rate": 3.437418466177631e-05, "loss": 2.3137, "step": 1943 }, { "epoch": 0.7552997741676987, "grad_norm": 0.6171875, "learning_rate": 3.427185004057011e-05, "loss": 2.3269, "step": 1944 }, { "epoch": 0.7556883028581142, "grad_norm": 0.625, "learning_rate": 3.4169636464988695e-05, "loss": 2.3205, "step": 1945 }, { "epoch": 0.7560768315485297, "grad_norm": 0.6640625, "learning_rate": 3.4067544123269646e-05, "loss": 2.3266, "step": 1946 }, { "epoch": 0.7564653602389452, "grad_norm": 0.62109375, "learning_rate": 3.396557320342724e-05, "loss": 2.4334, "step": 1947 }, { "epoch": 0.7568538889293607, "grad_norm": 0.609375, "learning_rate": 3.386372389325213e-05, "loss": 2.3688, "step": 1948 }, { "epoch": 0.7572424176197761, "grad_norm": 0.62109375, "learning_rate": 3.3761996380311067e-05, "loss": 2.3101, "step": 1949 }, { "epoch": 0.7576309463101916, "grad_norm": 0.65625, "learning_rate": 3.3660390851946456e-05, "loss": 2.381, "step": 1950 }, { "epoch": 0.7580194750006071, "grad_norm": 0.609375, "learning_rate": 3.355890749527608e-05, "loss": 2.3385, "step": 1951 }, { "epoch": 0.7584080036910226, "grad_norm": 0.6328125, "learning_rate": 3.345754649719267e-05, "loss": 2.4517, "step": 1952 }, { "epoch": 0.7587965323814381, "grad_norm": 0.6171875, "learning_rate": 3.335630804436368e-05, "loss": 2.3311, "step": 1953 }, { "epoch": 0.7591850610718536, "grad_norm": 0.62890625, "learning_rate": 3.325519232323089e-05, "loss": 2.3541, "step": 1954 }, { "epoch": 0.759573589762269, "grad_norm": 0.640625, "learning_rate": 3.315419952001001e-05, "loss": 2.3937, "step": 1955 }, { "epoch": 0.7599621184526845, "grad_norm": 0.60546875, "learning_rate": 3.30533298206904e-05, "loss": 2.3497, "step": 1956 }, { "epoch": 0.7603506471431, "grad_norm": 0.62890625, "learning_rate": 3.295258341103475e-05, "loss": 2.3502, "step": 1957 }, { "epoch": 0.7607391758335155, "grad_norm": 0.6484375, "learning_rate": 3.285196047657865e-05, "loss": 2.4155, "step": 1958 }, { "epoch": 0.761127704523931, "grad_norm": 0.62109375, "learning_rate": 3.27514612026303e-05, "loss": 2.3778, "step": 1959 }, { "epoch": 0.7615162332143465, "grad_norm": 0.59765625, "learning_rate": 3.265108577427022e-05, "loss": 2.2699, "step": 1960 }, { "epoch": 0.7619047619047619, "grad_norm": 0.63671875, "learning_rate": 3.2550834376350745e-05, "loss": 2.3854, "step": 1961 }, { "epoch": 0.7622932905951774, "grad_norm": 0.640625, "learning_rate": 3.245070719349591e-05, "loss": 2.3895, "step": 1962 }, { "epoch": 0.7626818192855929, "grad_norm": 0.62109375, "learning_rate": 3.235070441010092e-05, "loss": 2.3569, "step": 1963 }, { "epoch": 0.7630703479760084, "grad_norm": 0.609375, "learning_rate": 3.225082621033193e-05, "loss": 2.3149, "step": 1964 }, { "epoch": 0.7634588766664239, "grad_norm": 0.6171875, "learning_rate": 3.215107277812563e-05, "loss": 2.3813, "step": 1965 }, { "epoch": 0.7638474053568394, "grad_norm": 0.65234375, "learning_rate": 3.2051444297188973e-05, "loss": 2.4437, "step": 1966 }, { "epoch": 0.7642359340472548, "grad_norm": 0.66015625, "learning_rate": 3.195194095099874e-05, "loss": 2.3745, "step": 1967 }, { "epoch": 0.7646244627376703, "grad_norm": 0.62890625, "learning_rate": 3.185256292280134e-05, "loss": 2.415, "step": 1968 }, { "epoch": 0.7650129914280858, "grad_norm": 0.61328125, "learning_rate": 3.1753310395612315e-05, "loss": 2.327, "step": 1969 }, { "epoch": 0.7654015201185013, "grad_norm": 0.671875, "learning_rate": 3.165418355221612e-05, "loss": 2.2945, "step": 1970 }, { "epoch": 0.7657900488089168, "grad_norm": 0.59765625, "learning_rate": 3.155518257516577e-05, "loss": 2.3694, "step": 1971 }, { "epoch": 0.7661785774993323, "grad_norm": 0.625, "learning_rate": 3.145630764678247e-05, "loss": 2.3799, "step": 1972 }, { "epoch": 0.7665671061897477, "grad_norm": 0.65625, "learning_rate": 3.135755894915526e-05, "loss": 2.3418, "step": 1973 }, { "epoch": 0.7669556348801632, "grad_norm": 0.609375, "learning_rate": 3.125893666414077e-05, "loss": 2.3477, "step": 1974 }, { "epoch": 0.7673441635705787, "grad_norm": 0.67578125, "learning_rate": 3.116044097336277e-05, "loss": 2.3125, "step": 1975 }, { "epoch": 0.7677326922609942, "grad_norm": 0.59375, "learning_rate": 3.1062072058211965e-05, "loss": 2.4135, "step": 1976 }, { "epoch": 0.7681212209514097, "grad_norm": 0.6171875, "learning_rate": 3.0963830099845484e-05, "loss": 2.3376, "step": 1977 }, { "epoch": 0.7685097496418252, "grad_norm": 0.61328125, "learning_rate": 3.086571527918673e-05, "loss": 2.2661, "step": 1978 }, { "epoch": 0.7688982783322406, "grad_norm": 0.59765625, "learning_rate": 3.0767727776924946e-05, "loss": 2.3262, "step": 1979 }, { "epoch": 0.7692868070226561, "grad_norm": 0.62109375, "learning_rate": 3.066986777351492e-05, "loss": 2.3649, "step": 1980 }, { "epoch": 0.7696753357130716, "grad_norm": 0.6328125, "learning_rate": 3.0572135449176606e-05, "loss": 2.3777, "step": 1981 }, { "epoch": 0.7700638644034871, "grad_norm": 0.59375, "learning_rate": 3.0474530983894857e-05, "loss": 2.3639, "step": 1982 }, { "epoch": 0.7704523930939026, "grad_norm": 0.66015625, "learning_rate": 3.037705455741903e-05, "loss": 2.3889, "step": 1983 }, { "epoch": 0.770840921784318, "grad_norm": 0.6328125, "learning_rate": 3.0279706349262727e-05, "loss": 2.4356, "step": 1984 }, { "epoch": 0.7712294504747335, "grad_norm": 0.62109375, "learning_rate": 3.0182486538703325e-05, "loss": 2.3708, "step": 1985 }, { "epoch": 0.771617979165149, "grad_norm": 0.609375, "learning_rate": 3.008539530478186e-05, "loss": 2.3059, "step": 1986 }, { "epoch": 0.7720065078555645, "grad_norm": 0.60546875, "learning_rate": 2.9988432826302537e-05, "loss": 2.365, "step": 1987 }, { "epoch": 0.77239503654598, "grad_norm": 0.625, "learning_rate": 2.9891599281832396e-05, "loss": 2.3592, "step": 1988 }, { "epoch": 0.7727835652363955, "grad_norm": 0.59765625, "learning_rate": 2.9794894849701083e-05, "loss": 2.3562, "step": 1989 }, { "epoch": 0.7731720939268109, "grad_norm": 0.6796875, "learning_rate": 2.9698319708000467e-05, "loss": 2.3798, "step": 1990 }, { "epoch": 0.7735606226172264, "grad_norm": 0.640625, "learning_rate": 2.96018740345843e-05, "loss": 2.3196, "step": 1991 }, { "epoch": 0.7739491513076419, "grad_norm": 0.6171875, "learning_rate": 2.9505558007067903e-05, "loss": 2.2949, "step": 1992 }, { "epoch": 0.7743376799980574, "grad_norm": 0.6328125, "learning_rate": 2.9409371802827857e-05, "loss": 2.3301, "step": 1993 }, { "epoch": 0.7747262086884729, "grad_norm": 0.6171875, "learning_rate": 2.9313315599001633e-05, "loss": 2.3685, "step": 1994 }, { "epoch": 0.7751147373788884, "grad_norm": 0.59375, "learning_rate": 2.9217389572487352e-05, "loss": 2.362, "step": 1995 }, { "epoch": 0.7755032660693038, "grad_norm": 0.7265625, "learning_rate": 2.9121593899943277e-05, "loss": 2.3637, "step": 1996 }, { "epoch": 0.7758917947597193, "grad_norm": 0.59375, "learning_rate": 2.9025928757787736e-05, "loss": 2.325, "step": 1997 }, { "epoch": 0.7762803234501348, "grad_norm": 0.59375, "learning_rate": 2.8930394322198608e-05, "loss": 2.272, "step": 1998 }, { "epoch": 0.7766688521405503, "grad_norm": 0.6484375, "learning_rate": 2.883499076911307e-05, "loss": 2.3975, "step": 1999 }, { "epoch": 0.7770573808309658, "grad_norm": 0.65625, "learning_rate": 2.8739718274227277e-05, "loss": 2.3345, "step": 2000 }, { "epoch": 0.7774459095213813, "grad_norm": 0.61328125, "learning_rate": 2.864457701299602e-05, "loss": 2.3376, "step": 2001 }, { "epoch": 0.7778344382117967, "grad_norm": 0.65234375, "learning_rate": 2.8549567160632397e-05, "loss": 2.3642, "step": 2002 }, { "epoch": 0.7782229669022122, "grad_norm": 0.625, "learning_rate": 2.845468889210752e-05, "loss": 2.3518, "step": 2003 }, { "epoch": 0.7786114955926277, "grad_norm": 0.61328125, "learning_rate": 2.8359942382150194e-05, "loss": 2.3389, "step": 2004 }, { "epoch": 0.7790000242830432, "grad_norm": 0.640625, "learning_rate": 2.826532780524649e-05, "loss": 2.3791, "step": 2005 }, { "epoch": 0.7793885529734587, "grad_norm": 0.60546875, "learning_rate": 2.8170845335639595e-05, "loss": 2.3456, "step": 2006 }, { "epoch": 0.7797770816638742, "grad_norm": 0.62109375, "learning_rate": 2.8076495147329375e-05, "loss": 2.3348, "step": 2007 }, { "epoch": 0.7801656103542896, "grad_norm": 0.6171875, "learning_rate": 2.79822774140721e-05, "loss": 2.3734, "step": 2008 }, { "epoch": 0.780554139044705, "grad_norm": 0.5859375, "learning_rate": 2.7888192309380102e-05, "loss": 2.3042, "step": 2009 }, { "epoch": 0.7809426677351206, "grad_norm": 0.62109375, "learning_rate": 2.7794240006521444e-05, "loss": 2.3699, "step": 2010 }, { "epoch": 0.7813311964255361, "grad_norm": 0.640625, "learning_rate": 2.7700420678519647e-05, "loss": 2.3301, "step": 2011 }, { "epoch": 0.7817197251159516, "grad_norm": 0.58984375, "learning_rate": 2.7606734498153365e-05, "loss": 2.3618, "step": 2012 }, { "epoch": 0.7821082538063671, "grad_norm": 0.65234375, "learning_rate": 2.7513181637955943e-05, "loss": 2.3884, "step": 2013 }, { "epoch": 0.7824967824967825, "grad_norm": 0.61328125, "learning_rate": 2.7419762270215312e-05, "loss": 2.3116, "step": 2014 }, { "epoch": 0.782885311187198, "grad_norm": 0.6171875, "learning_rate": 2.7326476566973512e-05, "loss": 2.3987, "step": 2015 }, { "epoch": 0.7832738398776135, "grad_norm": 0.625, "learning_rate": 2.7233324700026464e-05, "loss": 2.2948, "step": 2016 }, { "epoch": 0.783662368568029, "grad_norm": 0.60546875, "learning_rate": 2.7140306840923558e-05, "loss": 2.2709, "step": 2017 }, { "epoch": 0.7840508972584445, "grad_norm": 0.625, "learning_rate": 2.704742316096743e-05, "loss": 2.4295, "step": 2018 }, { "epoch": 0.78443942594886, "grad_norm": 0.62109375, "learning_rate": 2.6954673831213605e-05, "loss": 2.3513, "step": 2019 }, { "epoch": 0.7848279546392753, "grad_norm": 0.6328125, "learning_rate": 2.6862059022470198e-05, "loss": 2.3083, "step": 2020 }, { "epoch": 0.7852164833296909, "grad_norm": 0.6328125, "learning_rate": 2.6769578905297588e-05, "loss": 2.3593, "step": 2021 }, { "epoch": 0.7856050120201064, "grad_norm": 0.62890625, "learning_rate": 2.667723365000804e-05, "loss": 2.2834, "step": 2022 }, { "epoch": 0.7859935407105219, "grad_norm": 0.73046875, "learning_rate": 2.6585023426665535e-05, "loss": 2.3171, "step": 2023 }, { "epoch": 0.7863820694009374, "grad_norm": 0.6015625, "learning_rate": 2.6492948405085348e-05, "loss": 2.2423, "step": 2024 }, { "epoch": 0.7867705980913529, "grad_norm": 0.62890625, "learning_rate": 2.640100875483379e-05, "loss": 2.3983, "step": 2025 }, { "epoch": 0.7871591267817682, "grad_norm": 0.64453125, "learning_rate": 2.6309204645227825e-05, "loss": 2.3511, "step": 2026 }, { "epoch": 0.7875476554721837, "grad_norm": 0.6171875, "learning_rate": 2.621753624533484e-05, "loss": 2.2993, "step": 2027 }, { "epoch": 0.7879361841625993, "grad_norm": 0.58984375, "learning_rate": 2.6126003723972326e-05, "loss": 2.3746, "step": 2028 }, { "epoch": 0.7883247128530148, "grad_norm": 0.6015625, "learning_rate": 2.603460724970741e-05, "loss": 2.4112, "step": 2029 }, { "epoch": 0.7887132415434303, "grad_norm": 0.6796875, "learning_rate": 2.594334699085682e-05, "loss": 2.4033, "step": 2030 }, { "epoch": 0.7891017702338458, "grad_norm": 0.6015625, "learning_rate": 2.5852223115486385e-05, "loss": 2.2852, "step": 2031 }, { "epoch": 0.7894902989242611, "grad_norm": 0.61328125, "learning_rate": 2.5761235791410698e-05, "loss": 2.3602, "step": 2032 }, { "epoch": 0.7898788276146766, "grad_norm": 0.60546875, "learning_rate": 2.567038518619297e-05, "loss": 2.3884, "step": 2033 }, { "epoch": 0.7902673563050922, "grad_norm": 0.64453125, "learning_rate": 2.5579671467144585e-05, "loss": 2.4177, "step": 2034 }, { "epoch": 0.7906558849955077, "grad_norm": 0.625, "learning_rate": 2.5489094801324854e-05, "loss": 2.3687, "step": 2035 }, { "epoch": 0.7910444136859232, "grad_norm": 0.60546875, "learning_rate": 2.5398655355540658e-05, "loss": 2.3799, "step": 2036 }, { "epoch": 0.7914329423763387, "grad_norm": 0.59375, "learning_rate": 2.530835329634622e-05, "loss": 2.3293, "step": 2037 }, { "epoch": 0.791821471066754, "grad_norm": 0.60546875, "learning_rate": 2.5218188790042706e-05, "loss": 2.3401, "step": 2038 }, { "epoch": 0.7922099997571695, "grad_norm": 0.609375, "learning_rate": 2.5128162002678024e-05, "loss": 2.3879, "step": 2039 }, { "epoch": 0.792598528447585, "grad_norm": 0.5859375, "learning_rate": 2.503827310004635e-05, "loss": 2.3356, "step": 2040 }, { "epoch": 0.7929870571380006, "grad_norm": 0.625, "learning_rate": 2.4948522247688023e-05, "loss": 2.3512, "step": 2041 }, { "epoch": 0.793375585828416, "grad_norm": 1.0546875, "learning_rate": 2.4858909610889114e-05, "loss": 2.3043, "step": 2042 }, { "epoch": 0.7937641145188316, "grad_norm": 0.63671875, "learning_rate": 2.476943535468117e-05, "loss": 2.3064, "step": 2043 }, { "epoch": 0.794152643209247, "grad_norm": 0.609375, "learning_rate": 2.4680099643840882e-05, "loss": 2.3251, "step": 2044 }, { "epoch": 0.7945411718996624, "grad_norm": 0.61328125, "learning_rate": 2.459090264288979e-05, "loss": 2.3401, "step": 2045 }, { "epoch": 0.794929700590078, "grad_norm": 0.62890625, "learning_rate": 2.4501844516094007e-05, "loss": 2.2794, "step": 2046 }, { "epoch": 0.7953182292804934, "grad_norm": 0.6171875, "learning_rate": 2.4412925427463874e-05, "loss": 2.3531, "step": 2047 }, { "epoch": 0.795706757970909, "grad_norm": 0.63671875, "learning_rate": 2.4324145540753697e-05, "loss": 2.3799, "step": 2048 }, { "epoch": 0.7960952866613243, "grad_norm": 0.609375, "learning_rate": 2.4235505019461367e-05, "loss": 2.3758, "step": 2049 }, { "epoch": 0.7964838153517398, "grad_norm": 0.62109375, "learning_rate": 2.4147004026828192e-05, "loss": 2.3625, "step": 2050 }, { "epoch": 0.7968723440421553, "grad_norm": 0.61328125, "learning_rate": 2.4058642725838486e-05, "loss": 2.3455, "step": 2051 }, { "epoch": 0.7972608727325708, "grad_norm": 0.6328125, "learning_rate": 2.3970421279219323e-05, "loss": 2.3983, "step": 2052 }, { "epoch": 0.7976494014229863, "grad_norm": 0.5859375, "learning_rate": 2.3882339849440205e-05, "loss": 2.3582, "step": 2053 }, { "epoch": 0.7980379301134018, "grad_norm": 0.61328125, "learning_rate": 2.3794398598712786e-05, "loss": 2.3543, "step": 2054 }, { "epoch": 0.7984264588038172, "grad_norm": 0.6171875, "learning_rate": 2.370659768899056e-05, "loss": 2.3625, "step": 2055 }, { "epoch": 0.7988149874942327, "grad_norm": 0.64453125, "learning_rate": 2.36189372819686e-05, "loss": 2.3334, "step": 2056 }, { "epoch": 0.7992035161846482, "grad_norm": 0.6328125, "learning_rate": 2.3531417539083134e-05, "loss": 2.429, "step": 2057 }, { "epoch": 0.7995920448750637, "grad_norm": 0.6171875, "learning_rate": 2.3444038621511433e-05, "loss": 2.3277, "step": 2058 }, { "epoch": 0.7999805735654792, "grad_norm": 0.609375, "learning_rate": 2.33568006901714e-05, "loss": 2.3584, "step": 2059 }, { "epoch": 0.8003691022558947, "grad_norm": 0.62109375, "learning_rate": 2.3269703905721285e-05, "loss": 2.4414, "step": 2060 }, { "epoch": 0.8007576309463101, "grad_norm": 0.6328125, "learning_rate": 2.318274842855941e-05, "loss": 2.3893, "step": 2061 }, { "epoch": 0.8011461596367256, "grad_norm": 0.625, "learning_rate": 2.309593441882385e-05, "loss": 2.2949, "step": 2062 }, { "epoch": 0.8015346883271411, "grad_norm": 0.58984375, "learning_rate": 2.3009262036392177e-05, "loss": 2.373, "step": 2063 }, { "epoch": 0.8019232170175566, "grad_norm": 0.578125, "learning_rate": 2.2922731440881128e-05, "loss": 2.392, "step": 2064 }, { "epoch": 0.8023117457079721, "grad_norm": 0.609375, "learning_rate": 2.2836342791646325e-05, "loss": 2.3395, "step": 2065 }, { "epoch": 0.8027002743983876, "grad_norm": 0.61328125, "learning_rate": 2.275009624778195e-05, "loss": 2.3918, "step": 2066 }, { "epoch": 0.803088803088803, "grad_norm": 0.6171875, "learning_rate": 2.2663991968120512e-05, "loss": 2.3602, "step": 2067 }, { "epoch": 0.8034773317792185, "grad_norm": 0.6328125, "learning_rate": 2.257803011123254e-05, "loss": 2.3232, "step": 2068 }, { "epoch": 0.803865860469634, "grad_norm": 0.625, "learning_rate": 2.2492210835426253e-05, "loss": 2.327, "step": 2069 }, { "epoch": 0.8042543891600495, "grad_norm": 0.61328125, "learning_rate": 2.2406534298747293e-05, "loss": 2.3697, "step": 2070 }, { "epoch": 0.804642917850465, "grad_norm": 0.59765625, "learning_rate": 2.2321000658978465e-05, "loss": 2.3967, "step": 2071 }, { "epoch": 0.8050314465408805, "grad_norm": 0.609375, "learning_rate": 2.2235610073639335e-05, "loss": 2.326, "step": 2072 }, { "epoch": 0.8054199752312959, "grad_norm": 0.6171875, "learning_rate": 2.2150362699986083e-05, "loss": 2.3649, "step": 2073 }, { "epoch": 0.8058085039217114, "grad_norm": 0.625, "learning_rate": 2.206525869501114e-05, "loss": 2.4364, "step": 2074 }, { "epoch": 0.8061970326121269, "grad_norm": 0.5859375, "learning_rate": 2.198029821544292e-05, "loss": 2.3134, "step": 2075 }, { "epoch": 0.8065855613025424, "grad_norm": 0.60546875, "learning_rate": 2.1895481417745457e-05, "loss": 2.3823, "step": 2076 }, { "epoch": 0.8069740899929579, "grad_norm": 0.62890625, "learning_rate": 2.1810808458118247e-05, "loss": 2.4092, "step": 2077 }, { "epoch": 0.8073626186833734, "grad_norm": 0.64453125, "learning_rate": 2.172627949249586e-05, "loss": 2.3765, "step": 2078 }, { "epoch": 0.8077511473737888, "grad_norm": 0.609375, "learning_rate": 2.1641894676547703e-05, "loss": 2.3685, "step": 2079 }, { "epoch": 0.8081396760642043, "grad_norm": 0.61328125, "learning_rate": 2.15576541656777e-05, "loss": 2.2613, "step": 2080 }, { "epoch": 0.8085282047546198, "grad_norm": 0.63671875, "learning_rate": 2.1473558115024027e-05, "loss": 2.2979, "step": 2081 }, { "epoch": 0.8089167334450353, "grad_norm": 0.609375, "learning_rate": 2.1389606679458828e-05, "loss": 2.3692, "step": 2082 }, { "epoch": 0.8093052621354508, "grad_norm": 0.6015625, "learning_rate": 2.1305800013587908e-05, "loss": 2.3236, "step": 2083 }, { "epoch": 0.8096937908258663, "grad_norm": 0.59375, "learning_rate": 2.122213827175048e-05, "loss": 2.3073, "step": 2084 }, { "epoch": 0.8100823195162817, "grad_norm": 0.63671875, "learning_rate": 2.113862160801883e-05, "loss": 2.3627, "step": 2085 }, { "epoch": 0.8104708482066972, "grad_norm": 0.60546875, "learning_rate": 2.1055250176198094e-05, "loss": 2.2891, "step": 2086 }, { "epoch": 0.8108593768971127, "grad_norm": 0.59765625, "learning_rate": 2.097202412982595e-05, "loss": 2.3075, "step": 2087 }, { "epoch": 0.8112479055875282, "grad_norm": 0.59375, "learning_rate": 2.088894362217233e-05, "loss": 2.3189, "step": 2088 }, { "epoch": 0.8116364342779437, "grad_norm": 0.62890625, "learning_rate": 2.080600880623913e-05, "loss": 2.2979, "step": 2089 }, { "epoch": 0.8120249629683592, "grad_norm": 0.59375, "learning_rate": 2.0723219834759945e-05, "loss": 2.3131, "step": 2090 }, { "epoch": 0.8124134916587746, "grad_norm": 0.5859375, "learning_rate": 2.0640576860199778e-05, "loss": 2.3436, "step": 2091 }, { "epoch": 0.8128020203491901, "grad_norm": 0.60546875, "learning_rate": 2.05580800347548e-05, "loss": 2.4079, "step": 2092 }, { "epoch": 0.8131905490396056, "grad_norm": 0.6015625, "learning_rate": 2.0475729510351937e-05, "loss": 2.3397, "step": 2093 }, { "epoch": 0.8135790777300211, "grad_norm": 0.60546875, "learning_rate": 2.0393525438648774e-05, "loss": 2.4194, "step": 2094 }, { "epoch": 0.8139676064204366, "grad_norm": 0.62109375, "learning_rate": 2.031146797103317e-05, "loss": 2.3762, "step": 2095 }, { "epoch": 0.8143561351108521, "grad_norm": 0.5859375, "learning_rate": 2.0229557258622977e-05, "loss": 2.402, "step": 2096 }, { "epoch": 0.8147446638012675, "grad_norm": 0.61328125, "learning_rate": 2.0147793452265796e-05, "loss": 2.3029, "step": 2097 }, { "epoch": 0.815133192491683, "grad_norm": 0.65625, "learning_rate": 2.0066176702538676e-05, "loss": 2.3814, "step": 2098 }, { "epoch": 0.8155217211820985, "grad_norm": 0.6171875, "learning_rate": 1.998470715974784e-05, "loss": 2.3413, "step": 2099 }, { "epoch": 0.815910249872514, "grad_norm": 0.61328125, "learning_rate": 1.990338497392845e-05, "loss": 2.3015, "step": 2100 }, { "epoch": 0.8162987785629295, "grad_norm": 0.6015625, "learning_rate": 1.9822210294844222e-05, "loss": 2.355, "step": 2101 }, { "epoch": 0.816687307253345, "grad_norm": 0.60546875, "learning_rate": 1.9741183271987284e-05, "loss": 2.4106, "step": 2102 }, { "epoch": 0.8170758359437604, "grad_norm": 0.5859375, "learning_rate": 1.9660304054577816e-05, "loss": 2.2703, "step": 2103 }, { "epoch": 0.8174643646341759, "grad_norm": 0.59375, "learning_rate": 1.9579572791563804e-05, "loss": 2.3253, "step": 2104 }, { "epoch": 0.8178528933245914, "grad_norm": 0.61328125, "learning_rate": 1.9498989631620767e-05, "loss": 2.3657, "step": 2105 }, { "epoch": 0.8182414220150069, "grad_norm": 0.6171875, "learning_rate": 1.941855472315145e-05, "loss": 2.4544, "step": 2106 }, { "epoch": 0.8186299507054224, "grad_norm": 0.6015625, "learning_rate": 1.9338268214285604e-05, "loss": 2.36, "step": 2107 }, { "epoch": 0.8190184793958379, "grad_norm": 0.66015625, "learning_rate": 1.925813025287968e-05, "loss": 2.3582, "step": 2108 }, { "epoch": 0.8194070080862533, "grad_norm": 0.6328125, "learning_rate": 1.9178140986516568e-05, "loss": 2.303, "step": 2109 }, { "epoch": 0.8197955367766688, "grad_norm": 0.63671875, "learning_rate": 1.9098300562505266e-05, "loss": 2.3696, "step": 2110 }, { "epoch": 0.8201840654670843, "grad_norm": 0.61328125, "learning_rate": 1.9018609127880727e-05, "loss": 2.3677, "step": 2111 }, { "epoch": 0.8205725941574998, "grad_norm": 0.62890625, "learning_rate": 1.8939066829403508e-05, "loss": 2.3268, "step": 2112 }, { "epoch": 0.8209611228479153, "grad_norm": 0.6015625, "learning_rate": 1.8859673813559497e-05, "loss": 2.3276, "step": 2113 }, { "epoch": 0.8213496515383308, "grad_norm": 0.6015625, "learning_rate": 1.8780430226559686e-05, "loss": 2.3263, "step": 2114 }, { "epoch": 0.8217381802287462, "grad_norm": 0.61328125, "learning_rate": 1.8701336214339883e-05, "loss": 2.3716, "step": 2115 }, { "epoch": 0.8221267089191617, "grad_norm": 0.6796875, "learning_rate": 1.8622391922560377e-05, "loss": 2.4096, "step": 2116 }, { "epoch": 0.8225152376095772, "grad_norm": 0.6171875, "learning_rate": 1.8543597496605793e-05, "loss": 2.3107, "step": 2117 }, { "epoch": 0.8229037662999927, "grad_norm": 0.6484375, "learning_rate": 1.8464953081584735e-05, "loss": 2.3928, "step": 2118 }, { "epoch": 0.8232922949904082, "grad_norm": 0.62109375, "learning_rate": 1.83864588223296e-05, "loss": 2.3962, "step": 2119 }, { "epoch": 0.8236808236808236, "grad_norm": 0.625, "learning_rate": 1.830811486339613e-05, "loss": 2.3183, "step": 2120 }, { "epoch": 0.8240693523712391, "grad_norm": 0.59375, "learning_rate": 1.8229921349063396e-05, "loss": 2.3754, "step": 2121 }, { "epoch": 0.8244578810616546, "grad_norm": 0.59765625, "learning_rate": 1.8151878423333346e-05, "loss": 2.3284, "step": 2122 }, { "epoch": 0.8248464097520701, "grad_norm": 0.640625, "learning_rate": 1.807398622993064e-05, "loss": 2.3157, "step": 2123 }, { "epoch": 0.8252349384424856, "grad_norm": 0.66796875, "learning_rate": 1.7996244912302317e-05, "loss": 2.3063, "step": 2124 }, { "epoch": 0.8256234671329011, "grad_norm": 0.6015625, "learning_rate": 1.791865461361758e-05, "loss": 2.3508, "step": 2125 }, { "epoch": 0.8260119958233165, "grad_norm": 0.62890625, "learning_rate": 1.7841215476767493e-05, "loss": 2.3617, "step": 2126 }, { "epoch": 0.826400524513732, "grad_norm": 0.58984375, "learning_rate": 1.7763927644364765e-05, "loss": 2.3354, "step": 2127 }, { "epoch": 0.8267890532041475, "grad_norm": 0.58203125, "learning_rate": 1.7686791258743473e-05, "loss": 2.2842, "step": 2128 }, { "epoch": 0.827177581894563, "grad_norm": 0.59765625, "learning_rate": 1.76098064619587e-05, "loss": 2.2952, "step": 2129 }, { "epoch": 0.8275661105849785, "grad_norm": 0.625, "learning_rate": 1.7532973395786467e-05, "loss": 2.3243, "step": 2130 }, { "epoch": 0.827954639275394, "grad_norm": 0.63671875, "learning_rate": 1.7456292201723325e-05, "loss": 2.3563, "step": 2131 }, { "epoch": 0.8283431679658094, "grad_norm": 0.61328125, "learning_rate": 1.7379763020986117e-05, "loss": 2.3241, "step": 2132 }, { "epoch": 0.8287316966562249, "grad_norm": 0.61328125, "learning_rate": 1.7303385994511778e-05, "loss": 2.3951, "step": 2133 }, { "epoch": 0.8291202253466404, "grad_norm": 0.62109375, "learning_rate": 1.722716126295699e-05, "loss": 2.3436, "step": 2134 }, { "epoch": 0.8295087540370559, "grad_norm": 0.609375, "learning_rate": 1.7151088966698004e-05, "loss": 2.2888, "step": 2135 }, { "epoch": 0.8298972827274714, "grad_norm": 0.6171875, "learning_rate": 1.7075169245830348e-05, "loss": 2.3908, "step": 2136 }, { "epoch": 0.8302858114178869, "grad_norm": 0.66796875, "learning_rate": 1.6999402240168505e-05, "loss": 2.4087, "step": 2137 }, { "epoch": 0.8306743401083023, "grad_norm": 0.60546875, "learning_rate": 1.6923788089245762e-05, "loss": 2.3647, "step": 2138 }, { "epoch": 0.8310628687987178, "grad_norm": 0.6171875, "learning_rate": 1.684832693231393e-05, "loss": 2.3629, "step": 2139 }, { "epoch": 0.8314513974891333, "grad_norm": 0.6171875, "learning_rate": 1.6773018908343018e-05, "loss": 2.3733, "step": 2140 }, { "epoch": 0.8318399261795488, "grad_norm": 0.64453125, "learning_rate": 1.669786415602105e-05, "loss": 2.3067, "step": 2141 }, { "epoch": 0.8322284548699643, "grad_norm": 0.6328125, "learning_rate": 1.662286281375377e-05, "loss": 2.3411, "step": 2142 }, { "epoch": 0.8326169835603798, "grad_norm": 0.6171875, "learning_rate": 1.654801501966442e-05, "loss": 2.3791, "step": 2143 }, { "epoch": 0.8330055122507952, "grad_norm": 0.7890625, "learning_rate": 1.6473320911593448e-05, "loss": 2.3025, "step": 2144 }, { "epoch": 0.8333940409412107, "grad_norm": 0.625, "learning_rate": 1.63987806270983e-05, "loss": 2.3023, "step": 2145 }, { "epoch": 0.8337825696316262, "grad_norm": 0.58984375, "learning_rate": 1.6324394303453073e-05, "loss": 2.3044, "step": 2146 }, { "epoch": 0.8341710983220417, "grad_norm": 0.58984375, "learning_rate": 1.625016207764839e-05, "loss": 2.3697, "step": 2147 }, { "epoch": 0.8345596270124572, "grad_norm": 0.59375, "learning_rate": 1.6176084086391074e-05, "loss": 2.3098, "step": 2148 }, { "epoch": 0.8349481557028727, "grad_norm": 0.61328125, "learning_rate": 1.6102160466103898e-05, "loss": 2.3355, "step": 2149 }, { "epoch": 0.8353366843932881, "grad_norm": 0.625, "learning_rate": 1.6028391352925354e-05, "loss": 2.3012, "step": 2150 }, { "epoch": 0.8357252130837036, "grad_norm": 0.6171875, "learning_rate": 1.5954776882709365e-05, "loss": 2.3258, "step": 2151 }, { "epoch": 0.8361137417741191, "grad_norm": 0.609375, "learning_rate": 1.5881317191025113e-05, "loss": 2.339, "step": 2152 }, { "epoch": 0.8365022704645346, "grad_norm": 0.63671875, "learning_rate": 1.5808012413156713e-05, "loss": 2.3705, "step": 2153 }, { "epoch": 0.8368907991549501, "grad_norm": 0.609375, "learning_rate": 1.5734862684102934e-05, "loss": 2.2996, "step": 2154 }, { "epoch": 0.8372793278453656, "grad_norm": 0.58984375, "learning_rate": 1.5661868138577096e-05, "loss": 2.3147, "step": 2155 }, { "epoch": 0.837667856535781, "grad_norm": 0.58984375, "learning_rate": 1.5589028911006675e-05, "loss": 2.3608, "step": 2156 }, { "epoch": 0.8380563852261965, "grad_norm": 0.62109375, "learning_rate": 1.5516345135533138e-05, "loss": 2.4363, "step": 2157 }, { "epoch": 0.838444913916612, "grad_norm": 0.6171875, "learning_rate": 1.5443816946011657e-05, "loss": 2.35, "step": 2158 }, { "epoch": 0.8388334426070275, "grad_norm": 0.609375, "learning_rate": 1.53714444760109e-05, "loss": 2.4453, "step": 2159 }, { "epoch": 0.839221971297443, "grad_norm": 0.59375, "learning_rate": 1.5299227858812693e-05, "loss": 2.4134, "step": 2160 }, { "epoch": 0.8396104999878585, "grad_norm": 0.62109375, "learning_rate": 1.5227167227411909e-05, "loss": 2.3808, "step": 2161 }, { "epoch": 0.8399990286782739, "grad_norm": 0.6171875, "learning_rate": 1.5155262714516138e-05, "loss": 2.446, "step": 2162 }, { "epoch": 0.8403875573686894, "grad_norm": 0.63671875, "learning_rate": 1.508351445254549e-05, "loss": 2.4715, "step": 2163 }, { "epoch": 0.8407760860591049, "grad_norm": 0.640625, "learning_rate": 1.501192257363222e-05, "loss": 2.3122, "step": 2164 }, { "epoch": 0.8411646147495204, "grad_norm": 0.640625, "learning_rate": 1.494048720962069e-05, "loss": 2.3619, "step": 2165 }, { "epoch": 0.8415531434399359, "grad_norm": 0.65234375, "learning_rate": 1.4869208492066989e-05, "loss": 2.363, "step": 2166 }, { "epoch": 0.8419416721303514, "grad_norm": 0.61328125, "learning_rate": 1.4798086552238732e-05, "loss": 2.3153, "step": 2167 }, { "epoch": 0.8423302008207668, "grad_norm": 0.609375, "learning_rate": 1.4727121521114784e-05, "loss": 2.2836, "step": 2168 }, { "epoch": 0.8427187295111823, "grad_norm": 0.625, "learning_rate": 1.4656313529385068e-05, "loss": 2.345, "step": 2169 }, { "epoch": 0.8431072582015978, "grad_norm": 0.62890625, "learning_rate": 1.4585662707450299e-05, "loss": 2.3396, "step": 2170 }, { "epoch": 0.8434957868920133, "grad_norm": 0.61328125, "learning_rate": 1.4515169185421751e-05, "loss": 2.4191, "step": 2171 }, { "epoch": 0.8438843155824288, "grad_norm": 0.609375, "learning_rate": 1.4444833093121025e-05, "loss": 2.3053, "step": 2172 }, { "epoch": 0.8442728442728443, "grad_norm": 0.61328125, "learning_rate": 1.4374654560079725e-05, "loss": 2.2813, "step": 2173 }, { "epoch": 0.8446613729632597, "grad_norm": 0.59765625, "learning_rate": 1.4304633715539384e-05, "loss": 2.3522, "step": 2174 }, { "epoch": 0.8450499016536752, "grad_norm": 0.62109375, "learning_rate": 1.4234770688451082e-05, "loss": 2.4164, "step": 2175 }, { "epoch": 0.8454384303440907, "grad_norm": 0.61328125, "learning_rate": 1.416506560747528e-05, "loss": 2.3329, "step": 2176 }, { "epoch": 0.8458269590345062, "grad_norm": 0.61328125, "learning_rate": 1.4095518600981562e-05, "loss": 2.3285, "step": 2177 }, { "epoch": 0.8462154877249217, "grad_norm": 0.62109375, "learning_rate": 1.4026129797048393e-05, "loss": 2.2963, "step": 2178 }, { "epoch": 0.8466040164153372, "grad_norm": 0.6640625, "learning_rate": 1.3956899323462891e-05, "loss": 2.3549, "step": 2179 }, { "epoch": 0.8469925451057526, "grad_norm": 0.609375, "learning_rate": 1.3887827307720636e-05, "loss": 2.3483, "step": 2180 }, { "epoch": 0.8473810737961681, "grad_norm": 0.62109375, "learning_rate": 1.3818913877025286e-05, "loss": 2.4284, "step": 2181 }, { "epoch": 0.8477696024865836, "grad_norm": 0.60546875, "learning_rate": 1.3750159158288546e-05, "loss": 2.3375, "step": 2182 }, { "epoch": 0.8481581311769991, "grad_norm": 0.58984375, "learning_rate": 1.3681563278129794e-05, "loss": 2.3789, "step": 2183 }, { "epoch": 0.8485466598674146, "grad_norm": 0.609375, "learning_rate": 1.3613126362875906e-05, "loss": 2.3979, "step": 2184 }, { "epoch": 0.8489351885578301, "grad_norm": 0.62890625, "learning_rate": 1.3544848538560972e-05, "loss": 2.3132, "step": 2185 }, { "epoch": 0.8493237172482455, "grad_norm": 0.6484375, "learning_rate": 1.3476729930926147e-05, "loss": 2.3686, "step": 2186 }, { "epoch": 0.849712245938661, "grad_norm": 0.6171875, "learning_rate": 1.3408770665419335e-05, "loss": 2.4061, "step": 2187 }, { "epoch": 0.8501007746290765, "grad_norm": 0.6171875, "learning_rate": 1.3340970867195013e-05, "loss": 2.3404, "step": 2188 }, { "epoch": 0.850489303319492, "grad_norm": 0.58984375, "learning_rate": 1.3273330661113992e-05, "loss": 2.3503, "step": 2189 }, { "epoch": 0.8508778320099075, "grad_norm": 0.625, "learning_rate": 1.3205850171743106e-05, "loss": 2.3556, "step": 2190 }, { "epoch": 0.8512663607003229, "grad_norm": 0.61328125, "learning_rate": 1.3138529523355148e-05, "loss": 2.3403, "step": 2191 }, { "epoch": 0.8516548893907384, "grad_norm": 0.6015625, "learning_rate": 1.3071368839928488e-05, "loss": 2.3289, "step": 2192 }, { "epoch": 0.8520434180811539, "grad_norm": 0.6171875, "learning_rate": 1.3004368245146915e-05, "loss": 2.3767, "step": 2193 }, { "epoch": 0.8524319467715694, "grad_norm": 0.62890625, "learning_rate": 1.2937527862399424e-05, "loss": 2.3762, "step": 2194 }, { "epoch": 0.8528204754619849, "grad_norm": 0.59375, "learning_rate": 1.2870847814779907e-05, "loss": 2.3307, "step": 2195 }, { "epoch": 0.8532090041524004, "grad_norm": 0.609375, "learning_rate": 1.2804328225087048e-05, "loss": 2.3917, "step": 2196 }, { "epoch": 0.8535975328428158, "grad_norm": 0.6328125, "learning_rate": 1.2737969215823986e-05, "loss": 2.4013, "step": 2197 }, { "epoch": 0.8539860615332313, "grad_norm": 0.6015625, "learning_rate": 1.2671770909198122e-05, "loss": 2.3378, "step": 2198 }, { "epoch": 0.8543745902236468, "grad_norm": 0.61328125, "learning_rate": 1.2605733427120925e-05, "loss": 2.3566, "step": 2199 }, { "epoch": 0.8547631189140623, "grad_norm": 0.59375, "learning_rate": 1.2539856891207712e-05, "loss": 2.4097, "step": 2200 }, { "epoch": 0.8551516476044778, "grad_norm": 0.6171875, "learning_rate": 1.2474141422777363e-05, "loss": 2.3139, "step": 2201 }, { "epoch": 0.8555401762948933, "grad_norm": 0.625, "learning_rate": 1.2408587142852179e-05, "loss": 2.3955, "step": 2202 }, { "epoch": 0.8559287049853087, "grad_norm": 0.5859375, "learning_rate": 1.2343194172157535e-05, "loss": 2.3451, "step": 2203 }, { "epoch": 0.8563172336757242, "grad_norm": 0.60546875, "learning_rate": 1.2277962631121809e-05, "loss": 2.3197, "step": 2204 }, { "epoch": 0.8567057623661397, "grad_norm": 0.58203125, "learning_rate": 1.221289263987606e-05, "loss": 2.319, "step": 2205 }, { "epoch": 0.8570942910565552, "grad_norm": 0.59375, "learning_rate": 1.2147984318253857e-05, "loss": 2.3246, "step": 2206 }, { "epoch": 0.8574828197469707, "grad_norm": 0.59375, "learning_rate": 1.2083237785791003e-05, "loss": 2.3354, "step": 2207 }, { "epoch": 0.8578713484373862, "grad_norm": 0.65234375, "learning_rate": 1.201865316172539e-05, "loss": 2.4292, "step": 2208 }, { "epoch": 0.8582598771278016, "grad_norm": 0.60546875, "learning_rate": 1.1954230564996682e-05, "loss": 2.3665, "step": 2209 }, { "epoch": 0.8586484058182171, "grad_norm": 0.60546875, "learning_rate": 1.1889970114246196e-05, "loss": 2.2845, "step": 2210 }, { "epoch": 0.8590369345086326, "grad_norm": 0.6171875, "learning_rate": 1.1825871927816635e-05, "loss": 2.3748, "step": 2211 }, { "epoch": 0.8594254631990481, "grad_norm": 0.6171875, "learning_rate": 1.176193612375186e-05, "loss": 2.3349, "step": 2212 }, { "epoch": 0.8598139918894636, "grad_norm": 0.625, "learning_rate": 1.1698162819796698e-05, "loss": 2.3285, "step": 2213 }, { "epoch": 0.8602025205798791, "grad_norm": 0.59375, "learning_rate": 1.1634552133396704e-05, "loss": 2.3757, "step": 2214 }, { "epoch": 0.8605910492702945, "grad_norm": 0.5859375, "learning_rate": 1.1571104181697957e-05, "loss": 2.3622, "step": 2215 }, { "epoch": 0.86097957796071, "grad_norm": 0.625, "learning_rate": 1.1507819081546878e-05, "loss": 2.3709, "step": 2216 }, { "epoch": 0.8613681066511255, "grad_norm": 0.59375, "learning_rate": 1.1444696949489908e-05, "loss": 2.3431, "step": 2217 }, { "epoch": 0.861756635341541, "grad_norm": 0.59765625, "learning_rate": 1.1381737901773405e-05, "loss": 2.3009, "step": 2218 }, { "epoch": 0.8621451640319565, "grad_norm": 0.61328125, "learning_rate": 1.1318942054343395e-05, "loss": 2.3848, "step": 2219 }, { "epoch": 0.862533692722372, "grad_norm": 0.62109375, "learning_rate": 1.1256309522845355e-05, "loss": 2.3062, "step": 2220 }, { "epoch": 0.8629222214127874, "grad_norm": 0.6484375, "learning_rate": 1.1193840422623981e-05, "loss": 2.3867, "step": 2221 }, { "epoch": 0.8633107501032029, "grad_norm": 0.6328125, "learning_rate": 1.1131534868722993e-05, "loss": 2.4272, "step": 2222 }, { "epoch": 0.8636992787936184, "grad_norm": 0.60546875, "learning_rate": 1.106939297588494e-05, "loss": 2.3541, "step": 2223 }, { "epoch": 0.8640878074840339, "grad_norm": 0.6015625, "learning_rate": 1.1007414858550968e-05, "loss": 2.3333, "step": 2224 }, { "epoch": 0.8644763361744494, "grad_norm": 0.62109375, "learning_rate": 1.0945600630860563e-05, "loss": 2.3475, "step": 2225 }, { "epoch": 0.8648648648648649, "grad_norm": 0.6171875, "learning_rate": 1.0883950406651466e-05, "loss": 2.354, "step": 2226 }, { "epoch": 0.8652533935552803, "grad_norm": 0.65625, "learning_rate": 1.0822464299459345e-05, "loss": 2.3314, "step": 2227 }, { "epoch": 0.8656419222456958, "grad_norm": 0.58984375, "learning_rate": 1.0761142422517623e-05, "loss": 2.2504, "step": 2228 }, { "epoch": 0.8660304509361113, "grad_norm": 0.59765625, "learning_rate": 1.0699984888757308e-05, "loss": 2.3928, "step": 2229 }, { "epoch": 0.8664189796265268, "grad_norm": 0.62109375, "learning_rate": 1.063899181080672e-05, "loss": 2.345, "step": 2230 }, { "epoch": 0.8668075083169423, "grad_norm": 0.6015625, "learning_rate": 1.0578163300991328e-05, "loss": 2.3404, "step": 2231 }, { "epoch": 0.8671960370073578, "grad_norm": 0.609375, "learning_rate": 1.0517499471333536e-05, "loss": 2.3495, "step": 2232 }, { "epoch": 0.8675845656977732, "grad_norm": 0.61328125, "learning_rate": 1.0457000433552477e-05, "loss": 2.3391, "step": 2233 }, { "epoch": 0.8679730943881887, "grad_norm": 0.59375, "learning_rate": 1.0396666299063763e-05, "loss": 2.362, "step": 2234 }, { "epoch": 0.8683616230786042, "grad_norm": 0.60546875, "learning_rate": 1.0336497178979343e-05, "loss": 2.2885, "step": 2235 }, { "epoch": 0.8687501517690197, "grad_norm": 0.65234375, "learning_rate": 1.0276493184107305e-05, "loss": 2.3794, "step": 2236 }, { "epoch": 0.8691386804594352, "grad_norm": 0.6171875, "learning_rate": 1.0216654424951589e-05, "loss": 2.3564, "step": 2237 }, { "epoch": 0.8695272091498507, "grad_norm": 0.61328125, "learning_rate": 1.0156981011711875e-05, "loss": 2.3339, "step": 2238 }, { "epoch": 0.8699157378402661, "grad_norm": 0.7265625, "learning_rate": 1.0097473054283301e-05, "loss": 2.3127, "step": 2239 }, { "epoch": 0.8703042665306816, "grad_norm": 0.58984375, "learning_rate": 1.003813066225635e-05, "loss": 2.3678, "step": 2240 }, { "epoch": 0.8706927952210971, "grad_norm": 0.60546875, "learning_rate": 9.97895394491657e-06, "loss": 2.3859, "step": 2241 }, { "epoch": 0.8710813239115126, "grad_norm": 0.6015625, "learning_rate": 9.91994301124437e-06, "loss": 2.3713, "step": 2242 }, { "epoch": 0.8714698526019281, "grad_norm": 0.6171875, "learning_rate": 9.861097969914902e-06, "loss": 2.3683, "step": 2243 }, { "epoch": 0.8718583812923436, "grad_norm": 0.58203125, "learning_rate": 9.802418929297773e-06, "loss": 2.2977, "step": 2244 }, { "epoch": 0.872246909982759, "grad_norm": 0.5859375, "learning_rate": 9.743905997456926e-06, "loss": 2.3027, "step": 2245 }, { "epoch": 0.8726354386731745, "grad_norm": 0.5859375, "learning_rate": 9.68555928215037e-06, "loss": 2.3658, "step": 2246 }, { "epoch": 0.87302396736359, "grad_norm": 0.61328125, "learning_rate": 9.627378890829963e-06, "loss": 2.2962, "step": 2247 }, { "epoch": 0.8734124960540055, "grad_norm": 0.58203125, "learning_rate": 9.569364930641323e-06, "loss": 2.3248, "step": 2248 }, { "epoch": 0.873801024744421, "grad_norm": 0.59375, "learning_rate": 9.51151750842354e-06, "loss": 2.3288, "step": 2249 }, { "epoch": 0.8741895534348365, "grad_norm": 0.5859375, "learning_rate": 9.453836730709031e-06, "loss": 2.3732, "step": 2250 }, { "epoch": 0.8745780821252519, "grad_norm": 0.62109375, "learning_rate": 9.396322703723282e-06, "loss": 2.3707, "step": 2251 }, { "epoch": 0.8749666108156674, "grad_norm": 0.59765625, "learning_rate": 9.338975533384719e-06, "loss": 2.3222, "step": 2252 }, { "epoch": 0.8753551395060829, "grad_norm": 0.60546875, "learning_rate": 9.281795325304455e-06, "loss": 2.3055, "step": 2253 }, { "epoch": 0.8757436681964984, "grad_norm": 0.6484375, "learning_rate": 9.224782184786129e-06, "loss": 2.4232, "step": 2254 }, { "epoch": 0.8761321968869139, "grad_norm": 0.6328125, "learning_rate": 9.167936216825746e-06, "loss": 2.3638, "step": 2255 }, { "epoch": 0.8765207255773293, "grad_norm": 0.59375, "learning_rate": 9.111257526111394e-06, "loss": 2.4049, "step": 2256 }, { "epoch": 0.8769092542677448, "grad_norm": 0.60546875, "learning_rate": 9.054746217023125e-06, "loss": 2.3684, "step": 2257 }, { "epoch": 0.8772977829581603, "grad_norm": 0.62109375, "learning_rate": 8.998402393632754e-06, "loss": 2.3333, "step": 2258 }, { "epoch": 0.8776863116485758, "grad_norm": 0.609375, "learning_rate": 8.942226159703603e-06, "loss": 2.3723, "step": 2259 }, { "epoch": 0.8780748403389913, "grad_norm": 0.60546875, "learning_rate": 8.88621761869044e-06, "loss": 2.3631, "step": 2260 }, { "epoch": 0.8784633690294068, "grad_norm": 0.61328125, "learning_rate": 8.83037687373911e-06, "loss": 2.3523, "step": 2261 }, { "epoch": 0.8788518977198222, "grad_norm": 0.62109375, "learning_rate": 8.774704027686509e-06, "loss": 2.3548, "step": 2262 }, { "epoch": 0.8792404264102377, "grad_norm": 0.609375, "learning_rate": 8.719199183060322e-06, "loss": 2.3561, "step": 2263 }, { "epoch": 0.8796289551006532, "grad_norm": 0.6015625, "learning_rate": 8.663862442078819e-06, "loss": 2.3768, "step": 2264 }, { "epoch": 0.8800174837910687, "grad_norm": 0.62109375, "learning_rate": 8.608693906650711e-06, "loss": 2.4043, "step": 2265 }, { "epoch": 0.8804060124814842, "grad_norm": 0.6171875, "learning_rate": 8.55369367837493e-06, "loss": 2.3483, "step": 2266 }, { "epoch": 0.8807945411718997, "grad_norm": 0.63671875, "learning_rate": 8.498861858540451e-06, "loss": 2.3346, "step": 2267 }, { "epoch": 0.8811830698623151, "grad_norm": 0.6171875, "learning_rate": 8.444198548126104e-06, "loss": 2.3474, "step": 2268 }, { "epoch": 0.8815715985527306, "grad_norm": 0.58203125, "learning_rate": 8.389703847800402e-06, "loss": 2.3003, "step": 2269 }, { "epoch": 0.8819601272431461, "grad_norm": 0.59375, "learning_rate": 8.335377857921322e-06, "loss": 2.3599, "step": 2270 }, { "epoch": 0.8823486559335616, "grad_norm": 0.60546875, "learning_rate": 8.28122067853615e-06, "loss": 2.3675, "step": 2271 }, { "epoch": 0.8827371846239771, "grad_norm": 0.61328125, "learning_rate": 8.227232409381314e-06, "loss": 2.3494, "step": 2272 }, { "epoch": 0.8831257133143926, "grad_norm": 0.5859375, "learning_rate": 8.173413149882147e-06, "loss": 2.2979, "step": 2273 }, { "epoch": 0.883514242004808, "grad_norm": 0.61328125, "learning_rate": 8.119762999152747e-06, "loss": 2.3499, "step": 2274 }, { "epoch": 0.8839027706952235, "grad_norm": 0.6015625, "learning_rate": 8.066282055995766e-06, "loss": 2.2933, "step": 2275 }, { "epoch": 0.884291299385639, "grad_norm": 0.63671875, "learning_rate": 8.012970418902244e-06, "loss": 2.3571, "step": 2276 }, { "epoch": 0.8846798280760545, "grad_norm": 0.59765625, "learning_rate": 7.959828186051454e-06, "loss": 2.3032, "step": 2277 }, { "epoch": 0.88506835676647, "grad_norm": 0.62109375, "learning_rate": 7.906855455310647e-06, "loss": 2.3277, "step": 2278 }, { "epoch": 0.8854568854568855, "grad_norm": 0.59375, "learning_rate": 7.85405232423494e-06, "loss": 2.3656, "step": 2279 }, { "epoch": 0.8858454141473009, "grad_norm": 0.58203125, "learning_rate": 7.801418890067114e-06, "loss": 2.3602, "step": 2280 }, { "epoch": 0.8862339428377164, "grad_norm": 0.6171875, "learning_rate": 7.748955249737432e-06, "loss": 2.3514, "step": 2281 }, { "epoch": 0.8866224715281319, "grad_norm": 0.6171875, "learning_rate": 7.696661499863467e-06, "loss": 2.3756, "step": 2282 }, { "epoch": 0.8870110002185474, "grad_norm": 0.60546875, "learning_rate": 7.644537736749924e-06, "loss": 2.3808, "step": 2283 }, { "epoch": 0.8873995289089629, "grad_norm": 0.57421875, "learning_rate": 7.5925840563884405e-06, "loss": 2.3155, "step": 2284 }, { "epoch": 0.8877880575993784, "grad_norm": 0.62109375, "learning_rate": 7.54080055445745e-06, "loss": 2.3637, "step": 2285 }, { "epoch": 0.8881765862897938, "grad_norm": 0.6328125, "learning_rate": 7.489187326321978e-06, "loss": 2.2882, "step": 2286 }, { "epoch": 0.8885651149802093, "grad_norm": 0.59765625, "learning_rate": 7.437744467033436e-06, "loss": 2.3655, "step": 2287 }, { "epoch": 0.8889536436706248, "grad_norm": 0.59765625, "learning_rate": 7.386472071329542e-06, "loss": 2.4046, "step": 2288 }, { "epoch": 0.8893421723610403, "grad_norm": 0.64453125, "learning_rate": 7.335370233634042e-06, "loss": 2.31, "step": 2289 }, { "epoch": 0.8897307010514558, "grad_norm": 0.62890625, "learning_rate": 7.2844390480566126e-06, "loss": 2.4038, "step": 2290 }, { "epoch": 0.8901192297418713, "grad_norm": 0.60546875, "learning_rate": 7.2336786083926245e-06, "loss": 2.3153, "step": 2291 }, { "epoch": 0.8905077584322867, "grad_norm": 0.59765625, "learning_rate": 7.183089008123012e-06, "loss": 2.2857, "step": 2292 }, { "epoch": 0.8908962871227022, "grad_norm": 0.609375, "learning_rate": 7.132670340414105e-06, "loss": 2.3884, "step": 2293 }, { "epoch": 0.8912848158131177, "grad_norm": 0.59765625, "learning_rate": 7.08242269811743e-06, "loss": 2.2858, "step": 2294 }, { "epoch": 0.8916733445035332, "grad_norm": 0.6171875, "learning_rate": 7.032346173769544e-06, "loss": 2.345, "step": 2295 }, { "epoch": 0.8920618731939487, "grad_norm": 0.609375, "learning_rate": 6.98244085959191e-06, "loss": 2.3408, "step": 2296 }, { "epoch": 0.8924504018843642, "grad_norm": 0.62890625, "learning_rate": 6.932706847490622e-06, "loss": 2.2851, "step": 2297 }, { "epoch": 0.8928389305747796, "grad_norm": 0.58203125, "learning_rate": 6.8831442290563485e-06, "loss": 2.3528, "step": 2298 }, { "epoch": 0.8932274592651951, "grad_norm": 0.60546875, "learning_rate": 6.833753095564122e-06, "loss": 2.3832, "step": 2299 }, { "epoch": 0.8936159879556106, "grad_norm": 0.58984375, "learning_rate": 6.784533537973137e-06, "loss": 2.3626, "step": 2300 }, { "epoch": 0.8940045166460261, "grad_norm": 0.61328125, "learning_rate": 6.735485646926631e-06, "loss": 2.3842, "step": 2301 }, { "epoch": 0.8943930453364416, "grad_norm": 0.6171875, "learning_rate": 6.686609512751696e-06, "loss": 2.3222, "step": 2302 }, { "epoch": 0.8947815740268571, "grad_norm": 0.66796875, "learning_rate": 6.637905225459129e-06, "loss": 2.4044, "step": 2303 }, { "epoch": 0.8951701027172725, "grad_norm": 0.62109375, "learning_rate": 6.589372874743227e-06, "loss": 2.348, "step": 2304 }, { "epoch": 0.895558631407688, "grad_norm": 0.5859375, "learning_rate": 6.541012549981651e-06, "loss": 2.3778, "step": 2305 }, { "epoch": 0.8959471600981035, "grad_norm": 0.609375, "learning_rate": 6.492824340235271e-06, "loss": 2.3016, "step": 2306 }, { "epoch": 0.896335688788519, "grad_norm": 0.58203125, "learning_rate": 6.444808334247965e-06, "loss": 2.3245, "step": 2307 }, { "epoch": 0.8967242174789345, "grad_norm": 0.62890625, "learning_rate": 6.396964620446522e-06, "loss": 2.4092, "step": 2308 }, { "epoch": 0.89711274616935, "grad_norm": 0.58984375, "learning_rate": 6.3492932869403835e-06, "loss": 2.3346, "step": 2309 }, { "epoch": 0.8975012748597654, "grad_norm": 0.5859375, "learning_rate": 6.301794421521568e-06, "loss": 2.3933, "step": 2310 }, { "epoch": 0.8978898035501809, "grad_norm": 0.57421875, "learning_rate": 6.25446811166448e-06, "loss": 2.4146, "step": 2311 }, { "epoch": 0.8982783322405964, "grad_norm": 0.58203125, "learning_rate": 6.207314444525703e-06, "loss": 2.349, "step": 2312 }, { "epoch": 0.8986668609310119, "grad_norm": 0.6171875, "learning_rate": 6.160333506943938e-06, "loss": 2.3358, "step": 2313 }, { "epoch": 0.8990553896214274, "grad_norm": 0.60546875, "learning_rate": 6.113525385439733e-06, "loss": 2.3736, "step": 2314 }, { "epoch": 0.8994439183118429, "grad_norm": 0.6171875, "learning_rate": 6.066890166215389e-06, "loss": 2.3436, "step": 2315 }, { "epoch": 0.8998324470022583, "grad_norm": 0.671875, "learning_rate": 6.020427935154816e-06, "loss": 2.3194, "step": 2316 }, { "epoch": 0.9002209756926738, "grad_norm": 0.640625, "learning_rate": 5.974138777823312e-06, "loss": 2.4071, "step": 2317 }, { "epoch": 0.9006095043830893, "grad_norm": 0.59375, "learning_rate": 5.928022779467468e-06, "loss": 2.3346, "step": 2318 }, { "epoch": 0.9009980330735048, "grad_norm": 0.62109375, "learning_rate": 5.88208002501498e-06, "loss": 2.2619, "step": 2319 }, { "epoch": 0.9013865617639203, "grad_norm": 0.64453125, "learning_rate": 5.836310599074468e-06, "loss": 2.3823, "step": 2320 }, { "epoch": 0.9017750904543358, "grad_norm": 0.6640625, "learning_rate": 5.790714585935397e-06, "loss": 2.3954, "step": 2321 }, { "epoch": 0.9021636191447512, "grad_norm": 0.59765625, "learning_rate": 5.745292069567809e-06, "loss": 2.366, "step": 2322 }, { "epoch": 0.9025521478351667, "grad_norm": 0.6015625, "learning_rate": 5.700043133622291e-06, "loss": 2.4269, "step": 2323 }, { "epoch": 0.9029406765255822, "grad_norm": 0.6015625, "learning_rate": 5.654967861429738e-06, "loss": 2.3784, "step": 2324 }, { "epoch": 0.9033292052159977, "grad_norm": 0.6171875, "learning_rate": 5.61006633600123e-06, "loss": 2.3596, "step": 2325 }, { "epoch": 0.9037177339064132, "grad_norm": 0.66015625, "learning_rate": 5.565338640027862e-06, "loss": 2.323, "step": 2326 }, { "epoch": 0.9041062625968286, "grad_norm": 0.640625, "learning_rate": 5.520784855880612e-06, "loss": 2.3243, "step": 2327 }, { "epoch": 0.9044947912872441, "grad_norm": 0.59765625, "learning_rate": 5.4764050656101795e-06, "loss": 2.3683, "step": 2328 }, { "epoch": 0.9048833199776596, "grad_norm": 0.58984375, "learning_rate": 5.432199350946832e-06, "loss": 2.3238, "step": 2329 }, { "epoch": 0.9052718486680751, "grad_norm": 0.62109375, "learning_rate": 5.388167793300281e-06, "loss": 2.4312, "step": 2330 }, { "epoch": 0.9056603773584906, "grad_norm": 0.60546875, "learning_rate": 5.344310473759462e-06, "loss": 2.32, "step": 2331 }, { "epoch": 0.9060489060489061, "grad_norm": 0.60546875, "learning_rate": 5.300627473092457e-06, "loss": 2.3038, "step": 2332 }, { "epoch": 0.9064374347393215, "grad_norm": 0.63671875, "learning_rate": 5.257118871746347e-06, "loss": 2.3765, "step": 2333 }, { "epoch": 0.906825963429737, "grad_norm": 0.61328125, "learning_rate": 5.213784749846984e-06, "loss": 2.324, "step": 2334 }, { "epoch": 0.9072144921201525, "grad_norm": 0.59765625, "learning_rate": 5.170625187198941e-06, "loss": 2.3849, "step": 2335 }, { "epoch": 0.907603020810568, "grad_norm": 0.578125, "learning_rate": 5.127640263285294e-06, "loss": 2.2514, "step": 2336 }, { "epoch": 0.9079915495009835, "grad_norm": 0.58203125, "learning_rate": 5.084830057267509e-06, "loss": 2.3077, "step": 2337 }, { "epoch": 0.908380078191399, "grad_norm": 0.5859375, "learning_rate": 5.04219464798531e-06, "loss": 2.3745, "step": 2338 }, { "epoch": 0.9087686068818144, "grad_norm": 0.6171875, "learning_rate": 4.9997341139565e-06, "loss": 2.35, "step": 2339 }, { "epoch": 0.9091571355722299, "grad_norm": 0.5859375, "learning_rate": 4.957448533376819e-06, "loss": 2.3007, "step": 2340 }, { "epoch": 0.9095456642626454, "grad_norm": 0.609375, "learning_rate": 4.915337984119805e-06, "loss": 2.3123, "step": 2341 }, { "epoch": 0.9099341929530609, "grad_norm": 0.578125, "learning_rate": 4.8734025437366826e-06, "loss": 2.406, "step": 2342 }, { "epoch": 0.9103227216434764, "grad_norm": 0.59375, "learning_rate": 4.831642289456184e-06, "loss": 2.3487, "step": 2343 }, { "epoch": 0.9107112503338919, "grad_norm": 0.6015625, "learning_rate": 4.790057298184425e-06, "loss": 2.3409, "step": 2344 }, { "epoch": 0.9110997790243073, "grad_norm": 0.6171875, "learning_rate": 4.748647646504722e-06, "loss": 2.3852, "step": 2345 }, { "epoch": 0.9114883077147228, "grad_norm": 0.6171875, "learning_rate": 4.70741341067753e-06, "loss": 2.3947, "step": 2346 }, { "epoch": 0.9118768364051383, "grad_norm": 0.640625, "learning_rate": 4.666354666640216e-06, "loss": 2.4049, "step": 2347 }, { "epoch": 0.9122653650955538, "grad_norm": 0.6015625, "learning_rate": 4.625471490007005e-06, "loss": 2.3076, "step": 2348 }, { "epoch": 0.9126538937859693, "grad_norm": 0.62890625, "learning_rate": 4.584763956068738e-06, "loss": 2.3434, "step": 2349 }, { "epoch": 0.9130424224763848, "grad_norm": 0.6328125, "learning_rate": 4.544232139792826e-06, "loss": 2.325, "step": 2350 }, { "epoch": 0.9134309511668002, "grad_norm": 0.6484375, "learning_rate": 4.503876115823081e-06, "loss": 2.4135, "step": 2351 }, { "epoch": 0.9138194798572157, "grad_norm": 0.640625, "learning_rate": 4.463695958479563e-06, "loss": 2.4263, "step": 2352 }, { "epoch": 0.9142080085476312, "grad_norm": 0.6171875, "learning_rate": 4.423691741758451e-06, "loss": 2.3761, "step": 2353 }, { "epoch": 0.9145965372380467, "grad_norm": 0.60546875, "learning_rate": 4.383863539331923e-06, "loss": 2.3851, "step": 2354 }, { "epoch": 0.9149850659284622, "grad_norm": 0.5703125, "learning_rate": 4.3442114245479835e-06, "loss": 2.2704, "step": 2355 }, { "epoch": 0.9153735946188777, "grad_norm": 0.640625, "learning_rate": 4.304735470430387e-06, "loss": 2.3783, "step": 2356 }, { "epoch": 0.9157621233092931, "grad_norm": 0.6171875, "learning_rate": 4.265435749678448e-06, "loss": 2.4151, "step": 2357 }, { "epoch": 0.9161506519997086, "grad_norm": 0.5859375, "learning_rate": 4.226312334666904e-06, "loss": 2.2984, "step": 2358 }, { "epoch": 0.9165391806901241, "grad_norm": 0.58203125, "learning_rate": 4.187365297445844e-06, "loss": 2.3461, "step": 2359 }, { "epoch": 0.9169277093805396, "grad_norm": 0.609375, "learning_rate": 4.148594709740539e-06, "loss": 2.3782, "step": 2360 }, { "epoch": 0.9173162380709551, "grad_norm": 0.6015625, "learning_rate": 4.110000642951273e-06, "loss": 2.3375, "step": 2361 }, { "epoch": 0.9177047667613706, "grad_norm": 0.58203125, "learning_rate": 4.071583168153293e-06, "loss": 2.3792, "step": 2362 }, { "epoch": 0.918093295451786, "grad_norm": 0.6015625, "learning_rate": 4.033342356096592e-06, "loss": 2.3389, "step": 2363 }, { "epoch": 0.9184818241422015, "grad_norm": 0.62890625, "learning_rate": 3.995278277205839e-06, "loss": 2.4245, "step": 2364 }, { "epoch": 0.918870352832617, "grad_norm": 0.6015625, "learning_rate": 3.957391001580235e-06, "loss": 2.2686, "step": 2365 }, { "epoch": 0.9192588815230325, "grad_norm": 0.60546875, "learning_rate": 3.919680598993347e-06, "loss": 2.3117, "step": 2366 }, { "epoch": 0.919647410213448, "grad_norm": 0.58984375, "learning_rate": 3.882147138893055e-06, "loss": 2.3161, "step": 2367 }, { "epoch": 0.9200359389038635, "grad_norm": 0.62890625, "learning_rate": 3.844790690401356e-06, "loss": 2.3478, "step": 2368 }, { "epoch": 0.9204244675942789, "grad_norm": 0.6484375, "learning_rate": 3.807611322314242e-06, "loss": 2.303, "step": 2369 }, { "epoch": 0.9208129962846944, "grad_norm": 0.63671875, "learning_rate": 3.770609103101641e-06, "loss": 2.4052, "step": 2370 }, { "epoch": 0.9212015249751099, "grad_norm": 0.62890625, "learning_rate": 3.7337841009072007e-06, "loss": 2.3042, "step": 2371 }, { "epoch": 0.9215900536655254, "grad_norm": 0.62109375, "learning_rate": 3.6971363835482163e-06, "loss": 2.3081, "step": 2372 }, { "epoch": 0.9219785823559409, "grad_norm": 0.58984375, "learning_rate": 3.660666018515491e-06, "loss": 2.3679, "step": 2373 }, { "epoch": 0.9223671110463564, "grad_norm": 0.61328125, "learning_rate": 3.624373072973242e-06, "loss": 2.2973, "step": 2374 }, { "epoch": 0.9227556397367718, "grad_norm": 0.625, "learning_rate": 3.588257613758883e-06, "loss": 2.4296, "step": 2375 }, { "epoch": 0.9231441684271873, "grad_norm": 0.60546875, "learning_rate": 3.5523197073830337e-06, "loss": 2.2725, "step": 2376 }, { "epoch": 0.9235326971176028, "grad_norm": 0.5859375, "learning_rate": 3.5165594200293193e-06, "loss": 2.3307, "step": 2377 }, { "epoch": 0.9239212258080183, "grad_norm": 0.625, "learning_rate": 3.4809768175542046e-06, "loss": 2.3393, "step": 2378 }, { "epoch": 0.9243097544984338, "grad_norm": 0.59765625, "learning_rate": 3.4455719654870045e-06, "loss": 2.3512, "step": 2379 }, { "epoch": 0.9246982831888493, "grad_norm": 0.58984375, "learning_rate": 3.4103449290296297e-06, "loss": 2.3553, "step": 2380 }, { "epoch": 0.9250868118792647, "grad_norm": 0.59375, "learning_rate": 3.375295773056564e-06, "loss": 2.3058, "step": 2381 }, { "epoch": 0.9254753405696802, "grad_norm": 0.609375, "learning_rate": 3.3404245621146855e-06, "loss": 2.3938, "step": 2382 }, { "epoch": 0.9258638692600957, "grad_norm": 0.63671875, "learning_rate": 3.305731360423159e-06, "loss": 2.3489, "step": 2383 }, { "epoch": 0.9262523979505112, "grad_norm": 0.58984375, "learning_rate": 3.271216231873353e-06, "loss": 2.3626, "step": 2384 }, { "epoch": 0.9266409266409267, "grad_norm": 0.5703125, "learning_rate": 3.2368792400286453e-06, "loss": 2.3769, "step": 2385 }, { "epoch": 0.9270294553313422, "grad_norm": 0.61328125, "learning_rate": 3.202720448124408e-06, "loss": 2.2995, "step": 2386 }, { "epoch": 0.9274179840217576, "grad_norm": 0.6015625, "learning_rate": 3.16873991906782e-06, "loss": 2.3558, "step": 2387 }, { "epoch": 0.9278065127121731, "grad_norm": 0.56640625, "learning_rate": 3.134937715437758e-06, "loss": 2.2806, "step": 2388 }, { "epoch": 0.9281950414025886, "grad_norm": 0.62109375, "learning_rate": 3.1013138994847036e-06, "loss": 2.3126, "step": 2389 }, { "epoch": 0.9285835700930041, "grad_norm": 0.59765625, "learning_rate": 3.0678685331306133e-06, "loss": 2.3287, "step": 2390 }, { "epoch": 0.9289720987834196, "grad_norm": 0.62890625, "learning_rate": 3.034601677968818e-06, "loss": 2.3597, "step": 2391 }, { "epoch": 0.929360627473835, "grad_norm": 0.60546875, "learning_rate": 3.0015133952638994e-06, "loss": 2.3254, "step": 2392 }, { "epoch": 0.9297491561642505, "grad_norm": 0.65234375, "learning_rate": 2.96860374595157e-06, "loss": 2.3784, "step": 2393 }, { "epoch": 0.930137684854666, "grad_norm": 0.62109375, "learning_rate": 2.9358727906385607e-06, "loss": 2.3332, "step": 2394 }, { "epoch": 0.9305262135450815, "grad_norm": 0.609375, "learning_rate": 2.9033205896025316e-06, "loss": 2.338, "step": 2395 }, { "epoch": 0.930914742235497, "grad_norm": 0.61328125, "learning_rate": 2.8709472027919405e-06, "loss": 2.3347, "step": 2396 }, { "epoch": 0.9313032709259125, "grad_norm": 0.609375, "learning_rate": 2.838752689825963e-06, "loss": 2.3698, "step": 2397 }, { "epoch": 0.9316917996163279, "grad_norm": 0.578125, "learning_rate": 2.8067371099943286e-06, "loss": 2.4252, "step": 2398 }, { "epoch": 0.9320803283067434, "grad_norm": 0.62109375, "learning_rate": 2.774900522257251e-06, "loss": 2.3157, "step": 2399 }, { "epoch": 0.9324688569971589, "grad_norm": 0.6484375, "learning_rate": 2.74324298524532e-06, "loss": 2.341, "step": 2400 }, { "epoch": 0.9328573856875744, "grad_norm": 0.578125, "learning_rate": 2.7117645572593777e-06, "loss": 2.3027, "step": 2401 }, { "epoch": 0.9332459143779899, "grad_norm": 0.625, "learning_rate": 2.6804652962703847e-06, "loss": 2.4317, "step": 2402 }, { "epoch": 0.9336344430684054, "grad_norm": 0.60546875, "learning_rate": 2.6493452599194112e-06, "loss": 2.4147, "step": 2403 }, { "epoch": 0.9340229717588208, "grad_norm": 0.6171875, "learning_rate": 2.6184045055174024e-06, "loss": 2.3043, "step": 2404 }, { "epoch": 0.9344115004492363, "grad_norm": 0.609375, "learning_rate": 2.5876430900451886e-06, "loss": 2.3907, "step": 2405 }, { "epoch": 0.9348000291396518, "grad_norm": 0.640625, "learning_rate": 2.5570610701532995e-06, "loss": 2.3827, "step": 2406 }, { "epoch": 0.9351885578300673, "grad_norm": 0.5859375, "learning_rate": 2.5266585021618717e-06, "loss": 2.4008, "step": 2407 }, { "epoch": 0.9355770865204828, "grad_norm": 0.6015625, "learning_rate": 2.4964354420606073e-06, "loss": 2.375, "step": 2408 }, { "epoch": 0.9359656152108983, "grad_norm": 0.640625, "learning_rate": 2.4663919455085727e-06, "loss": 2.3972, "step": 2409 }, { "epoch": 0.9363541439013137, "grad_norm": 0.6015625, "learning_rate": 2.4365280678342094e-06, "loss": 2.3483, "step": 2410 }, { "epoch": 0.9367426725917292, "grad_norm": 0.625, "learning_rate": 2.406843864035102e-06, "loss": 2.3772, "step": 2411 }, { "epoch": 0.9371312012821447, "grad_norm": 0.609375, "learning_rate": 2.377339388777999e-06, "loss": 2.3935, "step": 2412 }, { "epoch": 0.9375197299725602, "grad_norm": 0.61328125, "learning_rate": 2.348014696398626e-06, "loss": 2.3582, "step": 2413 }, { "epoch": 0.9379082586629757, "grad_norm": 0.625, "learning_rate": 2.3188698409016494e-06, "loss": 2.4331, "step": 2414 }, { "epoch": 0.9382967873533912, "grad_norm": 0.5859375, "learning_rate": 2.2899048759605136e-06, "loss": 2.3858, "step": 2415 }, { "epoch": 0.9386853160438066, "grad_norm": 0.61328125, "learning_rate": 2.261119854917404e-06, "loss": 2.3011, "step": 2416 }, { "epoch": 0.9390738447342221, "grad_norm": 0.60546875, "learning_rate": 2.2325148307831057e-06, "loss": 2.4114, "step": 2417 }, { "epoch": 0.9394623734246376, "grad_norm": 0.65234375, "learning_rate": 2.204089856236913e-06, "loss": 2.4188, "step": 2418 }, { "epoch": 0.9398509021150531, "grad_norm": 0.6015625, "learning_rate": 2.1758449836265403e-06, "loss": 2.2944, "step": 2419 }, { "epoch": 0.9402394308054686, "grad_norm": 0.6015625, "learning_rate": 2.147780264968047e-06, "loss": 2.3652, "step": 2420 }, { "epoch": 0.9406279594958841, "grad_norm": 0.62890625, "learning_rate": 2.1198957519456887e-06, "loss": 2.3548, "step": 2421 }, { "epoch": 0.9410164881862995, "grad_norm": 0.6171875, "learning_rate": 2.092191495911877e-06, "loss": 2.3325, "step": 2422 }, { "epoch": 0.941405016876715, "grad_norm": 0.60546875, "learning_rate": 2.0646675478870337e-06, "loss": 2.3293, "step": 2423 }, { "epoch": 0.9417935455671305, "grad_norm": 0.60546875, "learning_rate": 2.037323958559545e-06, "loss": 2.4153, "step": 2424 }, { "epoch": 0.942182074257546, "grad_norm": 0.6015625, "learning_rate": 2.0101607782856526e-06, "loss": 2.306, "step": 2425 }, { "epoch": 0.9425706029479615, "grad_norm": 0.6015625, "learning_rate": 1.983178057089341e-06, "loss": 2.3611, "step": 2426 }, { "epoch": 0.942959131638377, "grad_norm": 0.64453125, "learning_rate": 1.9563758446622502e-06, "loss": 2.3424, "step": 2427 }, { "epoch": 0.9433476603287924, "grad_norm": 0.61328125, "learning_rate": 1.9297541903636196e-06, "loss": 2.3627, "step": 2428 }, { "epoch": 0.9437361890192079, "grad_norm": 0.62890625, "learning_rate": 1.9033131432201424e-06, "loss": 2.349, "step": 2429 }, { "epoch": 0.9441247177096234, "grad_norm": 0.6484375, "learning_rate": 1.877052751925934e-06, "loss": 2.3583, "step": 2430 }, { "epoch": 0.9445132464000389, "grad_norm": 0.66796875, "learning_rate": 1.8509730648423762e-06, "loss": 2.2995, "step": 2431 }, { "epoch": 0.9449017750904544, "grad_norm": 0.63671875, "learning_rate": 1.8250741299980945e-06, "loss": 2.331, "step": 2432 }, { "epoch": 0.9452903037808699, "grad_norm": 0.6015625, "learning_rate": 1.7993559950888362e-06, "loss": 2.3712, "step": 2433 }, { "epoch": 0.9456788324712853, "grad_norm": 0.59765625, "learning_rate": 1.7738187074773705e-06, "loss": 2.3326, "step": 2434 }, { "epoch": 0.9460673611617008, "grad_norm": 0.6171875, "learning_rate": 1.748462314193422e-06, "loss": 2.3624, "step": 2435 }, { "epoch": 0.9464558898521163, "grad_norm": 0.6015625, "learning_rate": 1.723286861933593e-06, "loss": 2.4015, "step": 2436 }, { "epoch": 0.9468444185425318, "grad_norm": 0.5859375, "learning_rate": 1.6982923970612409e-06, "loss": 2.3807, "step": 2437 }, { "epoch": 0.9472329472329473, "grad_norm": 0.625, "learning_rate": 1.673478965606423e-06, "loss": 2.3904, "step": 2438 }, { "epoch": 0.9476214759233628, "grad_norm": 0.609375, "learning_rate": 1.6488466132658087e-06, "loss": 2.327, "step": 2439 }, { "epoch": 0.9480100046137782, "grad_norm": 0.578125, "learning_rate": 1.6243953854025773e-06, "loss": 2.3492, "step": 2440 }, { "epoch": 0.9483985333041937, "grad_norm": 0.609375, "learning_rate": 1.6001253270463757e-06, "loss": 2.3737, "step": 2441 }, { "epoch": 0.9487870619946092, "grad_norm": 0.61328125, "learning_rate": 1.5760364828931728e-06, "loss": 2.3146, "step": 2442 }, { "epoch": 0.9491755906850247, "grad_norm": 0.59375, "learning_rate": 1.5521288973052273e-06, "loss": 2.3018, "step": 2443 }, { "epoch": 0.9495641193754402, "grad_norm": 0.5859375, "learning_rate": 1.5284026143110087e-06, "loss": 2.3319, "step": 2444 }, { "epoch": 0.9499526480658557, "grad_norm": 0.58984375, "learning_rate": 1.504857677605065e-06, "loss": 2.3617, "step": 2445 }, { "epoch": 0.950341176756271, "grad_norm": 0.59375, "learning_rate": 1.481494130547989e-06, "loss": 2.3314, "step": 2446 }, { "epoch": 0.9507297054466866, "grad_norm": 0.58203125, "learning_rate": 1.45831201616633e-06, "loss": 2.3293, "step": 2447 }, { "epoch": 0.9511182341371021, "grad_norm": 0.58984375, "learning_rate": 1.435311377152493e-06, "loss": 2.3415, "step": 2448 }, { "epoch": 0.9515067628275176, "grad_norm": 0.5859375, "learning_rate": 1.4124922558646946e-06, "loss": 2.3067, "step": 2449 }, { "epoch": 0.9518952915179331, "grad_norm": 0.61328125, "learning_rate": 1.3898546943268643e-06, "loss": 2.4032, "step": 2450 }, { "epoch": 0.9522838202083486, "grad_norm": 0.65625, "learning_rate": 1.3673987342285533e-06, "loss": 2.3676, "step": 2451 }, { "epoch": 0.952672348898764, "grad_norm": 0.6015625, "learning_rate": 1.3451244169249033e-06, "loss": 2.3903, "step": 2452 }, { "epoch": 0.9530608775891795, "grad_norm": 0.6328125, "learning_rate": 1.3230317834365013e-06, "loss": 2.367, "step": 2453 }, { "epoch": 0.953449406279595, "grad_norm": 0.6015625, "learning_rate": 1.3011208744493796e-06, "loss": 2.3764, "step": 2454 }, { "epoch": 0.9538379349700105, "grad_norm": 0.66796875, "learning_rate": 1.2793917303148716e-06, "loss": 2.3566, "step": 2455 }, { "epoch": 0.954226463660426, "grad_norm": 0.78125, "learning_rate": 1.2578443910496008e-06, "loss": 2.4127, "step": 2456 }, { "epoch": 0.9546149923508415, "grad_norm": 0.69140625, "learning_rate": 1.236478896335358e-06, "loss": 2.3669, "step": 2457 }, { "epoch": 0.9550035210412569, "grad_norm": 0.609375, "learning_rate": 1.2152952855190692e-06, "loss": 2.311, "step": 2458 }, { "epoch": 0.9553920497316724, "grad_norm": 0.625, "learning_rate": 1.1942935976126724e-06, "loss": 2.4434, "step": 2459 }, { "epoch": 0.9557805784220879, "grad_norm": 0.58984375, "learning_rate": 1.1734738712930849e-06, "loss": 2.3608, "step": 2460 }, { "epoch": 0.9561691071125034, "grad_norm": 0.625, "learning_rate": 1.1528361449021475e-06, "loss": 2.3522, "step": 2461 }, { "epoch": 0.9565576358029189, "grad_norm": 0.5703125, "learning_rate": 1.1323804564464802e-06, "loss": 2.3984, "step": 2462 }, { "epoch": 0.9569461644933343, "grad_norm": 0.6328125, "learning_rate": 1.1121068435974935e-06, "loss": 2.3543, "step": 2463 }, { "epoch": 0.9573346931837498, "grad_norm": 0.60546875, "learning_rate": 1.0920153436912883e-06, "loss": 2.2961, "step": 2464 }, { "epoch": 0.9577232218741653, "grad_norm": 0.62109375, "learning_rate": 1.072105993728556e-06, "loss": 2.3472, "step": 2465 }, { "epoch": 0.9581117505645808, "grad_norm": 0.61328125, "learning_rate": 1.0523788303745674e-06, "loss": 2.3973, "step": 2466 }, { "epoch": 0.9585002792549963, "grad_norm": 0.60546875, "learning_rate": 1.0328338899590616e-06, "loss": 2.3392, "step": 2467 }, { "epoch": 0.9588888079454118, "grad_norm": 0.65234375, "learning_rate": 1.0134712084762022e-06, "loss": 2.3869, "step": 2468 }, { "epoch": 0.9592773366358271, "grad_norm": 0.6171875, "learning_rate": 9.94290821584498e-07, "loss": 2.303, "step": 2469 }, { "epoch": 0.9596658653262427, "grad_norm": 0.58203125, "learning_rate": 9.752927646067388e-07, "loss": 2.3608, "step": 2470 }, { "epoch": 0.9600543940166582, "grad_norm": 0.65625, "learning_rate": 9.564770725299376e-07, "loss": 2.3699, "step": 2471 }, { "epoch": 0.9604429227070737, "grad_norm": 0.609375, "learning_rate": 9.378437800052764e-07, "loss": 2.3729, "step": 2472 }, { "epoch": 0.9608314513974892, "grad_norm": 0.58203125, "learning_rate": 9.193929213480057e-07, "loss": 2.3335, "step": 2473 }, { "epoch": 0.9612199800879047, "grad_norm": 0.6171875, "learning_rate": 9.011245305374006e-07, "loss": 2.3212, "step": 2474 }, { "epoch": 0.96160850877832, "grad_norm": 0.5859375, "learning_rate": 8.83038641216738e-07, "loss": 2.3239, "step": 2475 }, { "epoch": 0.9619970374687355, "grad_norm": 0.609375, "learning_rate": 8.651352866931639e-07, "loss": 2.2481, "step": 2476 }, { "epoch": 0.962385566159151, "grad_norm": 0.6484375, "learning_rate": 8.47414499937671e-07, "loss": 2.3728, "step": 2477 }, { "epoch": 0.9627740948495666, "grad_norm": 0.60546875, "learning_rate": 8.298763135850429e-07, "loss": 2.3857, "step": 2478 }, { "epoch": 0.963162623539982, "grad_norm": 0.59765625, "learning_rate": 8.125207599337769e-07, "loss": 2.293, "step": 2479 }, { "epoch": 0.9635511522303976, "grad_norm": 0.5859375, "learning_rate": 7.953478709460394e-07, "loss": 2.347, "step": 2480 }, { "epoch": 0.963939680920813, "grad_norm": 0.62890625, "learning_rate": 7.783576782476099e-07, "loss": 2.3957, "step": 2481 }, { "epoch": 0.9643282096112284, "grad_norm": 0.625, "learning_rate": 7.615502131277819e-07, "loss": 2.3651, "step": 2482 }, { "epoch": 0.964716738301644, "grad_norm": 0.5859375, "learning_rate": 7.449255065393624e-07, "loss": 2.2837, "step": 2483 }, { "epoch": 0.9651052669920595, "grad_norm": 0.59765625, "learning_rate": 7.284835890985608e-07, "loss": 2.3985, "step": 2484 }, { "epoch": 0.965493795682475, "grad_norm": 0.61328125, "learning_rate": 7.122244910850006e-07, "loss": 2.3838, "step": 2485 }, { "epoch": 0.9658823243728905, "grad_norm": 0.6484375, "learning_rate": 6.961482424415855e-07, "loss": 2.3677, "step": 2486 }, { "epoch": 0.9662708530633058, "grad_norm": 0.60546875, "learning_rate": 6.802548727745106e-07, "loss": 2.4086, "step": 2487 }, { "epoch": 0.9666593817537213, "grad_norm": 0.6015625, "learning_rate": 6.645444113531519e-07, "loss": 2.3239, "step": 2488 }, { "epoch": 0.9670479104441368, "grad_norm": 0.58984375, "learning_rate": 6.490168871100766e-07, "loss": 2.3253, "step": 2489 }, { "epoch": 0.9674364391345524, "grad_norm": 0.60546875, "learning_rate": 6.336723286408996e-07, "loss": 2.3663, "step": 2490 }, { "epoch": 0.9678249678249679, "grad_norm": 0.6171875, "learning_rate": 6.185107642043275e-07, "loss": 2.3496, "step": 2491 }, { "epoch": 0.9682134965153834, "grad_norm": 0.59765625, "learning_rate": 6.035322217220584e-07, "loss": 2.377, "step": 2492 }, { "epoch": 0.9686020252057987, "grad_norm": 0.62890625, "learning_rate": 5.88736728778727e-07, "loss": 2.3933, "step": 2493 }, { "epoch": 0.9689905538962142, "grad_norm": 0.625, "learning_rate": 5.741243126218487e-07, "loss": 2.3243, "step": 2494 }, { "epoch": 0.9693790825866297, "grad_norm": 0.60546875, "learning_rate": 5.596950001618085e-07, "loss": 2.3747, "step": 2495 }, { "epoch": 0.9697676112770452, "grad_norm": 0.61328125, "learning_rate": 5.45448817971772e-07, "loss": 2.3768, "step": 2496 }, { "epoch": 0.9701561399674608, "grad_norm": 0.5859375, "learning_rate": 5.313857922876419e-07, "loss": 2.4069, "step": 2497 }, { "epoch": 0.9705446686578763, "grad_norm": 0.59765625, "learning_rate": 5.17505949008057e-07, "loss": 2.3018, "step": 2498 }, { "epoch": 0.9709331973482916, "grad_norm": 0.59765625, "learning_rate": 5.038093136942367e-07, "loss": 2.3644, "step": 2499 }, { "epoch": 0.9713217260387071, "grad_norm": 0.61328125, "learning_rate": 4.902959115700712e-07, "loss": 2.325, "step": 2500 }, { "epoch": 0.9717102547291226, "grad_norm": 0.625, "learning_rate": 4.769657675219752e-07, "loss": 2.4057, "step": 2501 }, { "epoch": 0.9720987834195381, "grad_norm": 0.59765625, "learning_rate": 4.6381890609886736e-07, "loss": 2.3302, "step": 2502 }, { "epoch": 0.9724873121099536, "grad_norm": 0.6015625, "learning_rate": 4.508553515121472e-07, "loss": 2.367, "step": 2503 }, { "epoch": 0.9728758408003692, "grad_norm": 0.62890625, "learning_rate": 4.380751276356176e-07, "loss": 2.4321, "step": 2504 }, { "epoch": 0.9732643694907845, "grad_norm": 0.61328125, "learning_rate": 4.25478258005485e-07, "loss": 2.4378, "step": 2505 }, { "epoch": 0.9736528981812, "grad_norm": 0.6015625, "learning_rate": 4.130647658202591e-07, "loss": 2.2761, "step": 2506 }, { "epoch": 0.9740414268716155, "grad_norm": 0.640625, "learning_rate": 4.008346739407642e-07, "loss": 2.3617, "step": 2507 }, { "epoch": 0.974429955562031, "grad_norm": 0.65625, "learning_rate": 3.887880048900394e-07, "loss": 2.3557, "step": 2508 }, { "epoch": 0.9748184842524465, "grad_norm": 0.5859375, "learning_rate": 3.7692478085337155e-07, "loss": 2.3727, "step": 2509 }, { "epoch": 0.975207012942862, "grad_norm": 0.58984375, "learning_rate": 3.652450236781957e-07, "loss": 2.3027, "step": 2510 }, { "epoch": 0.9755955416332774, "grad_norm": 0.61328125, "learning_rate": 3.5374875487405037e-07, "loss": 2.3671, "step": 2511 }, { "epoch": 0.9759840703236929, "grad_norm": 0.61328125, "learning_rate": 3.424359956126e-07, "loss": 2.3165, "step": 2512 }, { "epoch": 0.9763725990141084, "grad_norm": 0.625, "learning_rate": 3.313067667275238e-07, "loss": 2.3788, "step": 2513 }, { "epoch": 0.976761127704524, "grad_norm": 0.63671875, "learning_rate": 3.2036108871452695e-07, "loss": 2.3492, "step": 2514 }, { "epoch": 0.9771496563949394, "grad_norm": 0.57421875, "learning_rate": 3.0959898173128497e-07, "loss": 2.2973, "step": 2515 }, { "epoch": 0.977538185085355, "grad_norm": 0.61328125, "learning_rate": 2.990204655973994e-07, "loss": 2.4172, "step": 2516 }, { "epoch": 0.9779267137757703, "grad_norm": 0.62890625, "learning_rate": 2.8862555979437544e-07, "loss": 2.3124, "step": 2517 }, { "epoch": 0.9783152424661858, "grad_norm": 0.5859375, "learning_rate": 2.784142834655667e-07, "loss": 2.3208, "step": 2518 }, { "epoch": 0.9787037711566013, "grad_norm": 0.609375, "learning_rate": 2.6838665541616404e-07, "loss": 2.3884, "step": 2519 }, { "epoch": 0.9790922998470168, "grad_norm": 0.59375, "learning_rate": 2.585426941131619e-07, "loss": 2.4062, "step": 2520 }, { "epoch": 0.9794808285374323, "grad_norm": 0.64453125, "learning_rate": 2.488824176852922e-07, "loss": 2.3114, "step": 2521 }, { "epoch": 0.9798693572278478, "grad_norm": 0.62109375, "learning_rate": 2.394058439230129e-07, "loss": 2.3523, "step": 2522 }, { "epoch": 0.9802578859182632, "grad_norm": 0.609375, "learning_rate": 2.3011299027847487e-07, "loss": 2.3356, "step": 2523 }, { "epoch": 0.9806464146086787, "grad_norm": 0.59765625, "learning_rate": 2.210038738654996e-07, "loss": 2.3744, "step": 2524 }, { "epoch": 0.9810349432990942, "grad_norm": 0.59375, "learning_rate": 2.1207851145953473e-07, "loss": 2.2672, "step": 2525 }, { "epoch": 0.9814234719895097, "grad_norm": 0.59375, "learning_rate": 2.033369194975987e-07, "loss": 2.3416, "step": 2526 }, { "epoch": 0.9818120006799252, "grad_norm": 0.62109375, "learning_rate": 1.9477911407831396e-07, "loss": 2.3808, "step": 2527 }, { "epoch": 0.9822005293703406, "grad_norm": 0.58984375, "learning_rate": 1.86405110961807e-07, "loss": 2.3057, "step": 2528 }, { "epoch": 0.9825890580607561, "grad_norm": 0.609375, "learning_rate": 1.7821492556973075e-07, "loss": 2.4372, "step": 2529 }, { "epoch": 0.9829775867511716, "grad_norm": 0.609375, "learning_rate": 1.7020857298521986e-07, "loss": 2.3674, "step": 2530 }, { "epoch": 0.9833661154415871, "grad_norm": 0.640625, "learning_rate": 1.6238606795284662e-07, "loss": 2.3223, "step": 2531 }, { "epoch": 0.9837546441320026, "grad_norm": 0.59375, "learning_rate": 1.547474248786096e-07, "loss": 2.2912, "step": 2532 }, { "epoch": 0.9841431728224181, "grad_norm": 0.625, "learning_rate": 1.4729265782993383e-07, "loss": 2.3836, "step": 2533 }, { "epoch": 0.9845317015128335, "grad_norm": 0.59765625, "learning_rate": 1.400217805355708e-07, "loss": 2.3461, "step": 2534 }, { "epoch": 0.984920230203249, "grad_norm": 0.6875, "learning_rate": 1.3293480638565392e-07, "loss": 2.3769, "step": 2535 }, { "epoch": 0.9853087588936645, "grad_norm": 0.6015625, "learning_rate": 1.2603174843162092e-07, "loss": 2.2917, "step": 2536 }, { "epoch": 0.98569728758408, "grad_norm": 0.61328125, "learning_rate": 1.1931261938621375e-07, "loss": 2.3128, "step": 2537 }, { "epoch": 0.9860858162744955, "grad_norm": 0.6171875, "learning_rate": 1.1277743162345644e-07, "loss": 2.344, "step": 2538 }, { "epoch": 0.986474344964911, "grad_norm": 0.65234375, "learning_rate": 1.0642619717859959e-07, "loss": 2.328, "step": 2539 }, { "epoch": 0.9868628736553264, "grad_norm": 0.59765625, "learning_rate": 1.002589277481536e-07, "loss": 2.3472, "step": 2540 }, { "epoch": 0.9872514023457419, "grad_norm": 0.6484375, "learning_rate": 9.427563468982215e-08, "loss": 2.3188, "step": 2541 }, { "epoch": 0.9876399310361574, "grad_norm": 0.62109375, "learning_rate": 8.847632902250214e-08, "loss": 2.3605, "step": 2542 }, { "epoch": 0.9880284597265729, "grad_norm": 0.64453125, "learning_rate": 8.28610214262393e-08, "loss": 2.3247, "step": 2543 }, { "epoch": 0.9884169884169884, "grad_norm": 0.60546875, "learning_rate": 7.742972224225043e-08, "loss": 2.3225, "step": 2544 }, { "epoch": 0.9888055171074039, "grad_norm": 0.640625, "learning_rate": 7.21824414728789e-08, "loss": 2.3744, "step": 2545 }, { "epoch": 0.9891940457978193, "grad_norm": 0.61328125, "learning_rate": 6.711918878155033e-08, "loss": 2.2812, "step": 2546 }, { "epoch": 0.9895825744882348, "grad_norm": 0.62109375, "learning_rate": 6.223997349282807e-08, "loss": 2.3866, "step": 2547 }, { "epoch": 0.9899711031786503, "grad_norm": 0.65234375, "learning_rate": 5.754480459229106e-08, "loss": 2.377, "step": 2548 }, { "epoch": 0.9903596318690658, "grad_norm": 0.62890625, "learning_rate": 5.303369072664488e-08, "loss": 2.3846, "step": 2549 }, { "epoch": 0.9907481605594813, "grad_norm": 6.3125, "learning_rate": 4.8706640203577406e-08, "loss": 2.2776, "step": 2550 }, { "epoch": 0.9911366892498968, "grad_norm": 0.60546875, "learning_rate": 4.456366099183651e-08, "loss": 2.3427, "step": 2551 }, { "epoch": 0.9915252179403122, "grad_norm": 0.59765625, "learning_rate": 4.0604760721174586e-08, "loss": 2.3697, "step": 2552 }, { "epoch": 0.9919137466307277, "grad_norm": 0.609375, "learning_rate": 3.6829946682348516e-08, "loss": 2.3838, "step": 2553 }, { "epoch": 0.9923022753211432, "grad_norm": 0.58984375, "learning_rate": 3.3239225827086386e-08, "loss": 2.2705, "step": 2554 }, { "epoch": 0.9926908040115587, "grad_norm": 0.61328125, "learning_rate": 2.983260476808747e-08, "loss": 2.3065, "step": 2555 }, { "epoch": 0.9930793327019742, "grad_norm": 0.63671875, "learning_rate": 2.6610089779044445e-08, "loss": 2.3795, "step": 2556 }, { "epoch": 0.9934678613923897, "grad_norm": 0.61328125, "learning_rate": 2.3571686794543467e-08, "loss": 2.3717, "step": 2557 }, { "epoch": 0.9938563900828051, "grad_norm": 0.62890625, "learning_rate": 2.0717401410164096e-08, "loss": 2.3575, "step": 2558 }, { "epoch": 0.9942449187732206, "grad_norm": 0.578125, "learning_rate": 1.8047238882379357e-08, "loss": 2.3786, "step": 2559 }, { "epoch": 0.9946334474636361, "grad_norm": 0.61328125, "learning_rate": 1.556120412857798e-08, "loss": 2.4276, "step": 2560 }, { "epoch": 0.9950219761540516, "grad_norm": 0.66015625, "learning_rate": 1.3259301727075458e-08, "loss": 2.3399, "step": 2561 }, { "epoch": 0.9954105048444671, "grad_norm": 0.609375, "learning_rate": 1.1141535917069678e-08, "loss": 2.3158, "step": 2562 }, { "epoch": 0.9957990335348826, "grad_norm": 0.61328125, "learning_rate": 9.207910598674208e-09, "loss": 2.317, "step": 2563 }, { "epoch": 0.996187562225298, "grad_norm": 0.6171875, "learning_rate": 7.458429332862781e-09, "loss": 2.3655, "step": 2564 }, { "epoch": 0.9965760909157135, "grad_norm": 0.66015625, "learning_rate": 5.893095341502619e-09, "loss": 2.3816, "step": 2565 }, { "epoch": 0.996964619606129, "grad_norm": 0.66796875, "learning_rate": 4.511911507321109e-09, "loss": 2.2994, "step": 2566 }, { "epoch": 0.9973531482965445, "grad_norm": 0.59375, "learning_rate": 3.31488037392802e-09, "loss": 2.3347, "step": 2567 }, { "epoch": 0.99774167698696, "grad_norm": 0.58984375, "learning_rate": 2.3020041457821885e-09, "loss": 2.2885, "step": 2568 }, { "epoch": 0.9981302056773755, "grad_norm": 0.671875, "learning_rate": 1.4732846882137274e-09, "loss": 2.3607, "step": 2569 }, { "epoch": 0.9985187343677909, "grad_norm": 0.6328125, "learning_rate": 8.287235273907179e-10, "loss": 2.3445, "step": 2570 }, { "epoch": 0.9989072630582064, "grad_norm": 0.62890625, "learning_rate": 3.683218503636177e-10, "loss": 2.3514, "step": 2571 }, { "epoch": 0.9992957917486219, "grad_norm": 0.58984375, "learning_rate": 9.208050498754617e-11, "loss": 2.3164, "step": 2572 }, { "epoch": 0.9996843204390374, "grad_norm": 0.6171875, "learning_rate": 0.0, "loss": 2.3925, "step": 2573 }, { "epoch": 0.9996843204390374, "step": 2573, "total_flos": 1.1741952995733012e+19, "train_loss": 2.4089672912015, "train_runtime": 234538.8638, "train_samples_per_second": 1.405, "train_steps_per_second": 0.011 } ], "logging_steps": 1, "max_steps": 2573, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.1741952995733012e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }